[PATCH] expr: Fix up constant_byte_string bitfield handling [PR98366]

Jakub Jelinek jakub@redhat.com
Fri Dec 18 20:41:24 GMT 2020


Hi!

constant_byte_string now uses a convert_to_bytes function, which doesn't
handle bitfields at all (don't punt on them, just puts them into wrong bits
or bytes).  Furthermore, I don't see a reason why that function should exist
at all, it duplicates native_encode_initializer functionality.
Except that native_encode_initializer punted on flexible array members and 2
tests in the testsuite relied on constant_byte_string handling those.
So, this patch throws away convert_to_bytes, uses native_encode_initializer
instead, but teaches it to handle flexible array members (only in the
non-mask mode with off == -1 for now), furthermore, it adds various corner
case checks that the old implementation was missing (like that STRING_CSTs
use int as length and therefore we shouldn't try to build larger than that
strings, or that native_encode*/native_interpret* APIs require sane
host and target bytes (8-bit on both).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-12-18  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/98366
	* fold-const.c (native_encode_initializer): Don't try to
	memset more than total_bytes with off == -1 even if len is large.
	Handle flexible array member initializers if off == -1 and mask is
	NULL.
	* expr.c (convert_to_bytes): Remove.
	(constant_byte_string): Use native_encode_initializer instead of
	convert_to_bytes.  Remove extraneous semicolon.  Punt on various
	corner-cases the APIs don't handle, like sizes > INT_MAX,
	BITS_PER_UNIT != 8, CHAR_BIT != 8.

	* gcc.c-torture/execute/pr98366.c: New test.

--- gcc/fold-const.c.jj	2020-12-09 09:36:05.017217418 +0100
+++ gcc/fold-const.c	2020-12-18 16:25:52.053997090 +0100
@@ -8197,7 +8197,7 @@ native_encode_initializer (tree init, un
 
 	  gcc_assert (TREE_CODE (type) == RECORD_TYPE || mask == NULL);
 	  if (ptr != NULL)
-	    memset (ptr, '\0', MIN (total_bytes - off, len));
+	    memset (ptr, '\0', MIN (total_bytes - o, len));
 	  for (cnt = 0; ; cnt++)
 	    {
 	      tree val = NULL_TREE, field = NULL_TREE;
@@ -8266,11 +8266,33 @@ native_encode_initializer (tree init, un
 	      if (TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE
 		  && TYPE_DOMAIN (TREE_TYPE (field))
 		  && ! TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (field))))
-		return 0;
-	      if (DECL_SIZE_UNIT (field) == NULL_TREE
-		  || !tree_fits_shwi_p (DECL_SIZE_UNIT (field)))
-		return 0;
-	      fieldsize = tree_to_shwi (DECL_SIZE_UNIT (field));
+		{
+		  if (mask || off != -1)
+		    return 0;
+		  if (val == NULL_TREE)
+		    continue;
+		  if (TREE_CODE (TREE_TYPE (val)) != ARRAY_TYPE)
+		    return 0;
+		  fieldsize = int_size_in_bytes (TREE_TYPE (val));
+		  if (fieldsize < 0
+		      || (int) fieldsize != fieldsize
+		      || (pos + fieldsize) > INT_MAX)
+		    return 0;
+		  if (pos + fieldsize > total_bytes)
+		    {
+		      if (ptr != NULL && total_bytes - o < len)
+			memset (ptr + (total_bytes - o),
+				'\0', MIN (pos + fieldsize - o, len));
+		      total_bytes = pos + fieldsize;
+		    }
+		}
+	      else
+		{
+		  if (DECL_SIZE_UNIT (field) == NULL_TREE
+		      || !tree_fits_shwi_p (DECL_SIZE_UNIT (field)))
+		    return 0;
+		  fieldsize = tree_to_shwi (DECL_SIZE_UNIT (field));
+		}
 	      if (fieldsize == 0)
 		continue;
 
@@ -8439,12 +8461,31 @@ native_encode_initializer (tree init, un
 		  || (pos >= off
 		      && (pos + fieldsize <= (HOST_WIDE_INT) off + len)))
 		{
-		  if (!native_encode_initializer (val, ptr ? ptr + pos - o
-							   : NULL,
-						  fieldsize,
-						  off == -1 ? -1 : 0,
-						  mask ? mask + pos : NULL))
+		  int fldsize = fieldsize;
+		  if (off == -1)
+		    {
+		      tree fld = DECL_CHAIN (field);
+		      while (fld)
+			{
+			  if (TREE_CODE (fld) == FIELD_DECL)
+			    break;
+			  fld = DECL_CHAIN (fld);
+			}
+		      if (fld == NULL_TREE)
+			fldsize = len - pos;
+		    }
+		  r = native_encode_initializer (val, ptr ? ptr + pos - o
+							  : NULL,
+						 fldsize,
+						 off == -1 ? -1 : 0,
+						 mask ? mask + pos : NULL);
+		  if (!r)
 		    return 0;
+		  if (off == -1
+		      && fldsize != fieldsize
+		      && r > fieldsize
+		      && pos + r > total_bytes)
+		    total_bytes = pos + r;
 		}
 	      else
 		{
--- gcc/expr.c.jj	2020-12-11 11:10:08.520613827 +0100
+++ gcc/expr.c	2020-12-18 16:31:09.841374954 +0100
@@ -11631,111 +11631,6 @@ is_aligning_offset (const_tree offset, c
   return TREE_CODE (offset) == ADDR_EXPR && TREE_OPERAND (offset, 0) == exp;
 }
 
-/* If EXPR is a constant initializer (either an expression or CONSTRUCTOR),
-   attempt to obtain its native representation as an array of nonzero BYTES.
-   Return true on success and false on failure (the latter without modifying
-   BYTES).  */
-
-static bool
-convert_to_bytes (tree type, tree expr, vec<unsigned char> *bytes)
-{
-  if (TREE_CODE (expr) == CONSTRUCTOR)
-    {
-      /* Set to the size of the CONSTRUCTOR elements.  */
-      unsigned HOST_WIDE_INT ctor_size = bytes->length ();
-
-      if (TREE_CODE (type) == ARRAY_TYPE)
-	{
-	  tree val, idx;
-	  tree eltype = TREE_TYPE (type);
-	  unsigned HOST_WIDE_INT elsize =
-	    tree_to_uhwi (TYPE_SIZE_UNIT (eltype));
-
-	  /* Jump through hoops to determine the lower bound for languages
-	     like Ada that can set it to an (almost) arbitrary value.  */
-	  tree dom = TYPE_DOMAIN (type);
-	  if (!dom)
-	    return false;
-	  tree min = TYPE_MIN_VALUE (dom);
-	  if (!min || !tree_fits_uhwi_p (min))
-	    return false;
-	  unsigned HOST_WIDE_INT i, last_idx = tree_to_uhwi (min) - 1;
-	  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (expr), i, idx, val)
-	    {
-	      /* Append zeros for elements with no initializers.  */
-	      if (!tree_fits_uhwi_p (idx))
-		return false;
-	      unsigned HOST_WIDE_INT cur_idx = tree_to_uhwi (idx);
-	      if (unsigned HOST_WIDE_INT size = cur_idx - (last_idx + 1))
-		{
-		  size = size * elsize + bytes->length ();
-		  bytes->safe_grow_cleared (size, true);
-		}
-
-	      if (!convert_to_bytes (eltype, val, bytes))
-		return false;
-
-	      last_idx = cur_idx;
-	    }
-	}
-      else if (TREE_CODE (type) == RECORD_TYPE)
-	{
-	  tree val, fld;
-	  unsigned HOST_WIDE_INT i;
-	  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (expr), i, fld, val)
-	    {
-	      /* Append zeros for members with no initializers and
-		 any padding.  */
-	      unsigned HOST_WIDE_INT cur_off = int_byte_position (fld);
-	      if (bytes->length () < cur_off)
-		bytes->safe_grow_cleared (cur_off, true);
-
-	      if (!convert_to_bytes (TREE_TYPE (val), val, bytes))
-		return false;
-	    }
-	}
-      else
-	return false;
-
-      /* Compute the size of the COSNTRUCTOR elements.  */
-      ctor_size = bytes->length () - ctor_size;
-
-      /* Append zeros to the byte vector to the full size of the type.
-	 The type size can be less than the size of the CONSTRUCTOR
-	 if the latter contains initializers for a flexible array
-	 member.  */
-      tree size = TYPE_SIZE_UNIT (type);
-      unsigned HOST_WIDE_INT type_size = tree_to_uhwi (size);
-      if (ctor_size < type_size)
-	if (unsigned HOST_WIDE_INT size_grow = type_size - ctor_size)
-	  bytes->safe_grow_cleared (bytes->length () + size_grow, true);
-
-      return true;
-    }
-
-  /* Except for RECORD_TYPE which may have an initialized flexible array
-     member, the size of a type is the same as the size of the initializer
-     (including any implicitly zeroed out members and padding).  Allocate
-     just enough for that many bytes.  */
-  tree expr_size = TYPE_SIZE_UNIT (TREE_TYPE (expr));
-  if (!expr_size || !tree_fits_uhwi_p (expr_size))
-    return false;
-  const unsigned HOST_WIDE_INT expr_bytes = tree_to_uhwi (expr_size);
-  const unsigned bytes_sofar = bytes->length ();
-  /* native_encode_expr can convert at most INT_MAX bytes.  vec is limited
-     to at most UINT_MAX.  */
-  if (bytes_sofar + expr_bytes > INT_MAX)
-    return false;
-
-  /* Unlike for RECORD_TYPE, there is no need to clear the memory since
-     it's completely overwritten by native_encode_expr.  */
-  bytes->safe_grow (bytes_sofar + expr_bytes, true);
-  unsigned char *pnext = bytes->begin () + bytes_sofar;
-  int nbytes = native_encode_expr (expr, pnext, expr_bytes, 0);
-  /* NBYTES is zero on failure.  Otherwise it should equal EXPR_BYTES.  */
-  return (unsigned HOST_WIDE_INT) nbytes == expr_bytes;
-}
-
 /* Return a STRING_CST corresponding to ARG's constant initializer either
    if it's a string constant, or, when VALREP is set, any other constant,
    or null otherwise.
@@ -11748,7 +11643,7 @@ static tree
 constant_byte_string (tree arg, tree *ptr_offset, tree *mem_size, tree *decl,
 		      bool valrep = false)
 {
-  tree dummy = NULL_TREE;;
+  tree dummy = NULL_TREE;
   if (!mem_size)
     mem_size = &dummy;
 
@@ -11903,18 +11798,42 @@ constant_byte_string (tree arg, tree *pt
       if (!base_off.is_constant (&cstoff))
 	return NULL_TREE;
 
+      /* Check that the host and target are sane.  */
+      if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
+	return NULL_TREE;
+
+      HOST_WIDE_INT typesz = int_size_in_bytes (TREE_TYPE (init));
+      if (typesz <= 0 || (int) typesz != typesz)
+	return NULL_TREE;
+
+      HOST_WIDE_INT size = typesz;
+      if (VAR_P (array)
+	  && DECL_SIZE_UNIT (array)
+	  && tree_fits_shwi_p (DECL_SIZE_UNIT (array)))
+	{
+	  size = tree_to_shwi (DECL_SIZE_UNIT (array));
+	  gcc_checking_assert (size >= typesz);
+	}
+
       /* If value representation was requested convert the initializer
 	 for the whole array or object into a string of bytes forming
 	 its value representation and return it.  */
-      auto_vec<unsigned char> bytes;
-      if (!convert_to_bytes (TREE_TYPE (init), init, &bytes))
-	return NULL_TREE;
+      unsigned char *bytes = XNEWVEC (unsigned char, size);
+      int r = native_encode_initializer (init, bytes, size);
+      if (r < typesz)
+	{
+	  XDELETEVEC (bytes);
+	  return NULL_TREE;
+	}
+
+      if (r < size)
+	memset (bytes + r, '\0', size - r);
 
-      unsigned n = bytes.length ();
-      const char *p = reinterpret_cast<const char *>(bytes.address ());
-      init = build_string_literal (n, p, char_type_node);
+      const char *p = reinterpret_cast<const char *>(bytes);
+      init = build_string_literal (size, p, char_type_node);
       init = TREE_OPERAND (init, 0);
       init = TREE_OPERAND (init, 0);
+      XDELETE (bytes);
 
       *mem_size = size_int (TREE_STRING_LENGTH (init));
       *ptr_offset = wide_int_to_tree (ssizetype, base_off);
@@ -11965,6 +11884,10 @@ constant_byte_string (tree arg, tree *pt
       && (TREE_CODE (TREE_TYPE (array)) == INTEGER_TYPE
 	  || TYPE_MAIN_VARIANT (inittype) == char_type_node))
     {
+      /* Check that the host and target are sane.  */
+      if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
+	return NULL_TREE;
+
       /* For a reference to (address of) a single constant character,
 	 store the native representation of the character in CHARBUF.
 	 If the reference is to an element of an array or a member
@@ -12007,6 +11930,9 @@ constant_byte_string (tree arg, tree *pt
 	    initsize = integer_zero_node;
 
 	  unsigned HOST_WIDE_INT size = tree_to_uhwi (initsize);
+	  if (size > (unsigned HOST_WIDE_INT) INT_MAX)
+	    return NULL_TREE;
+
 	  init = build_string_literal (size, NULL, chartype, size);
 	  init = TREE_OPERAND (init, 0);
 	  init = TREE_OPERAND (init, 0);
--- gcc/testsuite/gcc.c-torture/execute/pr98366.c.jj	2020-12-18 14:46:43.665716370 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr98366.c	2020-12-18 14:50:12.732338089 +0100
@@ -0,0 +1,13 @@
+/* PR middle-end/98366 */
+/* { dg-require-effective-target int32 } */
+
+typedef struct S { int a, b, c : 7, d : 8, e : 17; } S;
+const S f[] = { {0, 3, 4, 2, 0} };
+
+int
+main ()
+{
+  if (__builtin_memcmp (f, (S[]){{.b = 3, .c = 4, .d = 2, .e = 0}}, sizeof (S)))
+    __builtin_abort ();
+  return 0;
+}

	Jakub



More information about the Gcc-patches mailing list