This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Add a character size parameter to c_strlen/get_range_strlen


On 08/14/2018 04:25 AM, Bernd Edlinger wrote:
> 2018-08-14  Bernd Edlinger  <bernd.edlinger@hotmail.de>
> 
> 	* builtins.c (c_strlen): Add new parameter eltsize.
> 	* builtins.h (c_strlen): Adjust prototype.
> 	* expr.c (string_constant): Add new parameter mem_size.
> 	* expr.h (string_constant): Adjust protoype.
> 	* gimple-fold.c (get_range_strlen): Add new parameter eltsize.
> 	* gimple-fold.h (get_range_strlen): Adjust prototype.
> 	* gimple-ssa-sprintf.c (get_string_length): Add new parameter eltsize.
> 	(format_string): Call get_string_length with eltsize.
So as I mentioned downthread, I made some minor tweaks.  Essentially
disentangling this from the prior patch to fix 86711/86714.  I don't
believe my changes affect this patch in any material way.

The key take-away from this patch is that callers of c_strlen now pass
to c_strlen how to count elements (1-byte, 2-byte or 4-byte chunks).
All the callers except the sprintf warning code count single bytes which
restores previous behavior.  The sprintf bits want to count element
sized chunks, which for wchars is 4 bytes (that count will then be
adjusted for multibyte-chars during analysis).

The patch requires no testsuite changes.

Installing on the trunk.

Jeff
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 6716aab..b1a79f3 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -568,41 +568,43 @@ string_length (const void *ptr, unsigned eltsize, unsigned maxelts)
    accesses.  Note that this implies the result is not going to be emitted
    into the instruction stream.
 
-   The value returned is of type `ssizetype'.
+   ELTSIZE is 1 for normal single byte character strings, and 2 or
+   4 for wide characer strings.  ELTSIZE is by default 1.
 
-   Unfortunately, string_constant can't access the values of const char
-   arrays with initializers, so neither can we do so here.  */
+   The value returned is of type `ssizetype'.  */
 
 tree
-c_strlen (tree src, int only_value)
+c_strlen (tree src, int only_value, unsigned eltsize)
 {
+  gcc_assert (eltsize == 1 || eltsize == 2 || eltsize == 4);
   STRIP_NOPS (src);
   if (TREE_CODE (src) == COND_EXPR
       && (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0))))
     {
       tree len1, len2;
 
-      len1 = c_strlen (TREE_OPERAND (src, 1), only_value);
-      len2 = c_strlen (TREE_OPERAND (src, 2), only_value);
+      len1 = c_strlen (TREE_OPERAND (src, 1), only_value, eltsize);
+      len2 = c_strlen (TREE_OPERAND (src, 2), only_value, eltsize);
       if (tree_int_cst_equal (len1, len2))
 	return len1;
     }
 
   if (TREE_CODE (src) == COMPOUND_EXPR
       && (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0))))
-    return c_strlen (TREE_OPERAND (src, 1), only_value);
+    return c_strlen (TREE_OPERAND (src, 1), only_value, eltsize);
 
   location_t loc = EXPR_LOC_OR_LOC (src, input_location);
 
   /* Offset from the beginning of the string in bytes.  */
   tree byteoff;
-  src = string_constant (src, &byteoff);
+  tree memsize;
+  src = string_constant (src, &byteoff, &memsize);
   if (src == 0)
     return NULL_TREE;
 
   /* Determine the size of the string element.  */
-  unsigned eltsize
-    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src))));
+  if (eltsize != tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src)))))
+    return NULL_TREE;
 
   /* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
      length of SRC.  Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
@@ -613,14 +615,10 @@ c_strlen (tree src, int only_value)
   HOST_WIDE_INT strelts = TREE_STRING_LENGTH (src);
   strelts = strelts / eltsize - 1;
 
-  HOST_WIDE_INT maxelts = strelts;
-  tree type = TREE_TYPE (src);
-  if (tree size = TYPE_SIZE_UNIT (type))
-    if (tree_fits_shwi_p (size))
-      {
-	maxelts = tree_to_uhwi (size);
-	maxelts = maxelts / eltsize - 1;
-      }
+  if (!tree_fits_uhwi_p (memsize))
+    return NULL_TREE;
+
+  HOST_WIDE_INT maxelts = tree_to_uhwi (memsize) / eltsize - 1;
 
   /* PTR can point to the byte representation of any string type, including
      char* and wchar_t*.  */
@@ -628,19 +626,23 @@ c_strlen (tree src, int only_value)
 
   if (byteoff && TREE_CODE (byteoff) != INTEGER_CST)
     {
+      /* For empty strings the result should be zero.  */
+      if (maxelts == 0)
+	return ssize_int (0);
+
+      /* The code below works only for single byte character types.  */
+      if (eltsize != 1)
+	return NULL_TREE;
+
       /* If the string has an internal NUL character followed by any
 	 non-NUL characters (e.g., "foo\0bar"), we can't compute
 	 the offset to the following NUL if we don't know where to
 	 start searching for it.  */
       unsigned len = string_length (ptr, eltsize, strelts);
-      if (len < strelts)
-	{
-	  /* Return when an embedded null character is found.  */
-	  return NULL_TREE;
-	}
 
-      if (!maxelts)
-	return ssize_int (0);
+      /* Return when an embedded null character is found or none at all.  */
+      if (len < strelts || len > maxelts)
+	return NULL_TREE;
 
       /* We don't know the starting offset, but we do know that the string
 	 has no internal zero bytes.  If the offset falls within the bounds
@@ -650,8 +652,8 @@ c_strlen (tree src, int only_value)
       tree offsave = TREE_SIDE_EFFECTS (byteoff) ? save_expr (byteoff) : byteoff;
       offsave = fold_convert (ssizetype, offsave);
       tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node, offsave,
-				      build_int_cst (ssizetype, len * eltsize));
-      tree lenexp = size_diffop_loc (loc, ssize_int (strelts * eltsize), offsave);
+				      build_int_cst (ssizetype, len));
+      tree lenexp = size_diffop_loc (loc, ssize_int (strelts), offsave);
       return fold_build3_loc (loc, COND_EXPR, ssizetype, condexp, lenexp,
 			      build_zero_cst (ssizetype));
     }
@@ -684,6 +686,11 @@ c_strlen (tree src, int only_value)
       return NULL_TREE;
     }
 
+  /* If eltoff is larger than strelts but less than maxelts the
+     string length is zero, since the excess memory will be zero.  */
+  if (eltoff > strelts)
+    return ssize_int (0);
+
   /* Use strlen to search for the first zero byte.  Since any strings
      constructed with build_string will have nulls appended, we win even
      if we get handed something like (char[4])"abcd".
@@ -691,7 +698,7 @@ c_strlen (tree src, int only_value)
      Since ELTOFF is our starting index into the string, no further
      calculation is needed.  */
   unsigned len = string_length (ptr + eltoff * eltsize, eltsize,
-				maxelts - eltoff);
+				strelts - eltoff);
 
   return ssize_int (len);
 }
diff --git a/gcc/builtins.h b/gcc/builtins.h
index 1113bd3..805f180 100644
--- a/gcc/builtins.h
+++ b/gcc/builtins.h
@@ -58,7 +58,7 @@ extern bool get_pointer_alignment_1 (tree, unsigned int *,
 				     unsigned HOST_WIDE_INT *);
 extern unsigned int get_pointer_alignment (tree);
 extern unsigned string_length (const void*, unsigned, unsigned);
-extern tree c_strlen (tree, int);
+extern tree c_strlen (tree, int, unsigned = 1);
 extern void expand_builtin_setjmp_setup (rtx, rtx);
 extern void expand_builtin_setjmp_receiver (rtx);
 extern void expand_builtin_update_setjmp_buf (rtx);
diff --git a/gcc/expr.c b/gcc/expr.c
index de6709d..1a8b608 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -11271,10 +11271,12 @@ is_aligning_offset (const_tree offset, const_tree exp)
 /* Return the tree node if an ARG corresponds to a string constant or zero
    if it doesn't.  If we return nonzero, set *PTR_OFFSET to the (possibly
    non-constant) offset in bytes within the string that ARG is accessing.
-   The type of the offset is sizetype.  */
+   The type of the offset is sizetype.  If MEM_SIZE is non-zero the storage
+   size of the memory is returned.  If MEM_SIZE is zero, the string is
+   only returned when it is properly zero terminated.  */
 
 tree
-string_constant (tree arg, tree *ptr_offset)
+string_constant (tree arg, tree *ptr_offset, tree *mem_size)
 {
   tree array;
   STRIP_NOPS (arg);
@@ -11328,7 +11330,7 @@ string_constant (tree arg, tree *ptr_offset)
 	return NULL_TREE;
 
       tree offset;
-      if (tree str = string_constant (arg0, &offset))
+      if (tree str = string_constant (arg0, &offset, mem_size))
 	{
 	  /* Avoid pointers to arrays (see bug 86622).  */
 	  if (POINTER_TYPE_P (TREE_TYPE (arg))
@@ -11368,6 +11370,8 @@ string_constant (tree arg, tree *ptr_offset)
   if (TREE_CODE (array) == STRING_CST)
     {
       *ptr_offset = fold_convert (sizetype, offset);
+      if (mem_size)
+	*mem_size = TYPE_SIZE_UNIT (TREE_TYPE (array));
       return array;
     }
 
@@ -11427,7 +11431,9 @@ string_constant (tree arg, tree *ptr_offset)
   unsigned HOST_WIDE_INT length = TREE_STRING_LENGTH (init);
   length = string_length (TREE_STRING_POINTER (init), charsize,
 			  length / charsize);
-  if (compare_tree_int (array_size, length + 1) < 0)
+  if (mem_size)
+    *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (init));
+  else if (compare_tree_int (array_size, length + 1) < 0)
     return NULL_TREE;
 
   *ptr_offset = offset;
diff --git a/gcc/expr.h b/gcc/expr.h
index cf047d4..d4d2564 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -288,7 +288,7 @@ expand_normal (tree exp)
 
 /* Return the tree node and offset if a given argument corresponds to
    a string constant.  */
-extern tree string_constant (tree, tree *);
+extern tree string_constant (tree, tree *, tree * = NULL);
 
 /* Two different ways of generating switch statements.  */
 extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx, profile_probability);
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index 42673b5..07341eb 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -1275,11 +1275,13 @@ gimple_fold_builtin_memset (gimple_stmt_iterator *gsi, tree c, tree len)
    Set *FLEXP to true if the range of the string lengths has been
    obtained from the upper bound of an array at the end of a struct.
    Such an array may hold a string that's longer than its upper bound
-   due to it being used as a poor-man's flexible array member.  */
+   due to it being used as a poor-man's flexible array member.
+   ELTSIZE is 1 for normal single byte character strings, and 2 or
+   4 for wide characer strings.  ELTSIZE is by default 1.  */
 
 static bool
 get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
-		  int fuzzy, bool *flexp)
+		  int fuzzy, bool *flexp, unsigned eltsize = 1)
 {
   tree var, val = NULL_TREE;
   gimple *def_stmt;
@@ -1300,8 +1302,8 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
 	      tree aop0 = TREE_OPERAND (op, 0);
 	      if (TREE_CODE (aop0) == INDIRECT_REF
 		  && TREE_CODE (TREE_OPERAND (aop0, 0)) == SSA_NAME)
-		return get_range_strlen (TREE_OPERAND (aop0, 0),
-					 length, visited, type, fuzzy, flexp);
+		return get_range_strlen (TREE_OPERAND (aop0, 0), length,
+					 visited, type, fuzzy, flexp, eltsize);
 	    }
 	  else if (TREE_CODE (TREE_OPERAND (op, 0)) == COMPONENT_REF && fuzzy)
 	    {
@@ -1329,13 +1331,13 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
 	    return false;
 	}
       else
-	val = c_strlen (arg, 1);
+	val = c_strlen (arg, 1, eltsize);
 
       if (!val && fuzzy)
 	{
 	  if (TREE_CODE (arg) == ADDR_EXPR)
 	    return get_range_strlen (TREE_OPERAND (arg, 0), length,
-				     visited, type, fuzzy, flexp);
+				     visited, type, fuzzy, flexp, eltsize);
 
 	  if (TREE_CODE (arg) == ARRAY_REF)
 	    {
@@ -1477,7 +1479,8 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
             || gimple_assign_unary_nop_p (def_stmt))
           {
             tree rhs = gimple_assign_rhs1 (def_stmt);
-	    return get_range_strlen (rhs, length, visited, type, fuzzy, flexp);
+	    return get_range_strlen (rhs, length, visited, type, fuzzy, flexp,
+				     eltsize);
           }
 	else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR)
 	  {
@@ -1486,7 +1489,7 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
 
 	    for (unsigned int i = 0; i < 2; i++)
 	      if (!get_range_strlen (ops[i], length, visited, type, fuzzy,
-				     flexp))
+				     flexp, eltsize))
 		{
 		  if (fuzzy == 2)
 		    *maxlen = build_all_ones_cst (size_type_node);
@@ -1513,7 +1516,8 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
             if (arg == gimple_phi_result (def_stmt))
               continue;
 
-	    if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp))
+	    if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp,
+				   eltsize))
 	      {
 		if (fuzzy == 2)
 		  *maxlen = build_all_ones_cst (size_type_node);
@@ -1545,10 +1549,13 @@ get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
    and false if PHIs and COND_EXPRs are to be handled optimistically,
    if we can determine string length minimum and maximum; it will use
    the minimum from the ones where it can be determined.
-   STRICT false should be only used for warning code.  */
+   STRICT false should be only used for warning code.
+
+   ELTSIZE is 1 for normal single byte character strings, and 2 or
+   4 for wide characer strings.  ELTSIZE is by default 1.  */
 
 bool
-get_range_strlen (tree arg, tree minmaxlen[2], bool strict)
+get_range_strlen (tree arg, tree minmaxlen[2], unsigned eltsize, bool strict)
 {
   bitmap visited = NULL;
 
@@ -1557,7 +1564,7 @@ get_range_strlen (tree arg, tree minmaxlen[2], bool strict)
 
   bool flexarray = false;
   if (!get_range_strlen (arg, minmaxlen, &visited, 1, strict ? 1 : 2,
-			 &flexarray))
+			 &flexarray, eltsize))
     {
       minmaxlen[0] = NULL_TREE;
       minmaxlen[1] = NULL_TREE;
@@ -3500,7 +3507,7 @@ gimple_fold_builtin_strlen (gimple_stmt_iterator *gsi)
   wide_int maxlen;
 
   tree lenrange[2];
-  if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, true)
+  if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, 1, true)
       && lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST
       && lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST)
     {
diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
index 04e9bfa..e3fad83 100644
--- a/gcc/gimple-fold.h
+++ b/gcc/gimple-fold.h
@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3.  If not see
 extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL);
 extern tree canonicalize_constructor_val (tree, tree);
 extern tree get_symbol_constant_value (tree);
-extern bool get_range_strlen (tree, tree[2], bool = false);
+extern bool get_range_strlen (tree, tree[2], unsigned = 1, bool = false);
 extern tree get_maxval_strlen (tree, int);
 extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree);
 extern bool fold_stmt (gimple_stmt_iterator *);
diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c
index 5213e17..2431b52 100644
--- a/gcc/gimple-ssa-sprintf.c
+++ b/gcc/gimple-ssa-sprintf.c
@@ -2125,12 +2125,12 @@ format_floating (const directive &dir, tree arg, vr_values *)
    Used by the format_string function below.  */
 
 static fmtresult
-get_string_length (tree str)
+get_string_length (tree str, unsigned eltsize)
 {
   if (!str)
     return fmtresult ();
 
-  if (tree slen = c_strlen (str, 1))
+  if (tree slen = c_strlen (str, 1, eltsize))
     {
       /* Simply return the length of the string.  */
       fmtresult res (tree_to_shwi (slen));
@@ -2143,7 +2143,7 @@ get_string_length (tree str)
      aren't known to point any such arrays result in LENRANGE[1] set
      to SIZE_MAX.  */
   tree lenrange[2];
-  bool flexarray = get_range_strlen (str, lenrange);
+  bool flexarray = get_range_strlen (str, lenrange, eltsize);
 
   if (lenrange [0] || lenrange [1])
     {
@@ -2195,7 +2195,7 @@ get_string_length (tree str)
       return res;
     }
 
-  return get_string_length (NULL_TREE);
+  return fmtresult ();
 }
 
 /* Return the minimum and maximum number of characters formatted
@@ -2274,7 +2274,8 @@ format_string (const directive &dir, tree arg, vr_values *)
   fmtresult res;
 
   /* Compute the range the argument's length can be in.  */
-  fmtresult slen = get_string_length (arg);
+  int count_by = dir.specifier == 'S' || dir.modifier == FMT_LEN_l ? 4 : 1;
+  fmtresult slen = get_string_length (arg, count_by);
   if (slen.range.min == slen.range.max
       && slen.range.min < HOST_WIDE_INT_MAX)
     {

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]