This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof


On 10/16/2015 06:27 AM, Bernd Schmidt wrote:
On 10/09/2015 04:55 AM, Martin Sebor wrote:
Gcc attempts to diagnose invalid offsetof expressions whose member
designator is an array element with an out-of-bounds index. The
logic in the function that does this detection is incomplete, leading
to false negatives. Since the result of the expression in these cases
can be surprising, this patch tightens up the logic to diagnose more
such cases.

Thank you for the review. Attached is an updated patch that hopefully
addresses all your comments. I ran the check_GNU_style.sh script on
it to make sure I didn't miss something.  I've also added replies to
a few of your comments below.


In the future, please explain more clearly in the patch submission what
the false negatives are. That'll make the reviewer's job easier.

The false negatives are at a high level explained in the bug:

  GCC fails to diagnose cases of invalid offsetof expressions whose
  member designator refers to an array element past the end of the
  array plus one.

The example there is intended to illustrate the general problem.
Beyond that, the test included in the patch shows other examples.
With an unpatched GCC, the latest one shows failures on the
following lines:

  128, 129, 134, 141, 142, 148, 149, 155, 156, 157, 158, 159, 163,
  164, 167, 168, 169, 170, 171, 181, 182, 183, 204, 205, 206, and
  207.

Hopefully that along with the comments in the code makes the problem
clear enough but I'd be happy to add more of either if that helps.

Tested by boostrapping and running c/c++ tests on x86_64 with no
regressions.

Should run the full testsuite (standard practice, and library tests
might have occurrences of offsetof).

Yes, that is what I meant by running c/c+ tests (i.e., I configured
gcc with --enable-languages=c,c++, bootstrapped it, and ran make
check).

+          /* Index is considered valid when it's either less than
+             ...
I admit to having trouble parsing this comment. Can you write that in a
clearer way somehow? I'm still trying to make my mind up whether the
logic in this patch could be simplified.

I've reworded and expanded the comment in the updated patch. Please
let me know if it's still unclear.

So I checked and it looks like we accept flexible array member syntax
like "int a[][2];", which suggests that the test might have the right
idea, but has the indices swapped (the first one is the flexible one)?
Ccing Joseph for a ruling.

I believe the test is in line with Joseph's expectation. I added
a few more test cases to cover the constructs he referred to in
his response (IIUC).

Martin
gcc/ChangeLog
2015-10-16  Martin Sebor  <msebor@redhat.com>

	PR c++-common/67882
	* c-family/c-common.h (struct offsetof_ctx_t): Declare.
	(fold_offsetof_1): Add argument.
	* c-family/c-common.c (struct offsetof_ctx_t): Define.
	(fold_offsetof_1): Diagnose more invalid offsetof expressions
	that reference elements past the end of an array.

testsuite/ChangeLog
2015-10-16  Martin Sebor  <msebor@redhat.com>

	PR c++-common/67882
	* c-c++-common/builtin-offsetof-2.c: New test.

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 4b922bf..c313a78 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -10535,13 +10535,38 @@ c_common_to_target_charset (HOST_WIDE_INT c)
 }

 /* Fold an offsetof-like expression.  EXPR is a nested sequence of component
-   references with an INDIRECT_REF of a constant at the bottom; much like the
-   traditional rendering of offsetof as a macro.  Return the folded result.  */
+   references with an INDIRECT_REF of a constant at the bottom; much like
+   the traditional rendering of offsetof as a macro.  Return the folded
+   result.  PCTX, which is initially null, is intended only for internal
+   use by the function.  It is set by the first recursive invocation of
+   the function to refer to a local object describing the potentially
+   out-of-bounds index of the array member whose offset is being computed,
+   and to indicate whether all indices to the same array object have
+   the highest valid value.  The function issues a warning for out-of-
+   bounds array indices that either refer to elements past the one just
+   past the end of the array object or that exceed any of the major
+   bounds.  */
+
+struct offsetof_ctx_t
+{
+  /* The possibly invalid array index or NULL_TREE.  */
+  tree index;
+  /* Clear when no index to the (possibly multi-dimensional) array
+     is known to have the same value as the corresponding upper bound
+     minus one.  Negative when unknown/don't care, positive otherwise.  */
+  int max_index;
+};

 tree
-fold_offsetof_1 (tree expr)
+fold_offsetof_1 (tree expr, offsetof_ctx_t *pctx /* = 0 */)
 {
   tree base, off, t;
+  offsetof_ctx_t ctx = { NULL_TREE, -1 };
+
+  /* Set the context pointer to point to the local context object
+     to use by subsequent recursive calls.  */
+  if (!pctx)
+    pctx = &ctx;

   switch (TREE_CODE (expr))
     {
@@ -10567,10 +10592,19 @@ fold_offsetof_1 (tree expr)
       return TREE_OPERAND (expr, 0);

     case COMPONENT_REF:
-      base = fold_offsetof_1 (TREE_OPERAND (expr, 0));
+      base = fold_offsetof_1 (TREE_OPERAND (expr, 0), pctx);
       if (base == error_mark_node)
 	return base;

+      if (ctx.index != NULL_TREE)
+	{
+	  warning (OPT_Warray_bounds,
+		   "index %E denotes an offset greater than size of %qT",
+		   ctx.index, TREE_TYPE (TREE_OPERAND (expr, 0)));
+	  /* Reset to avoid diagnosing the same expression multiple times.  */
+	  pctx->index = NULL_TREE;
+	}
+
       t = TREE_OPERAND (expr, 1);
       if (DECL_C_BIT_FIELD (t))
 	{
@@ -10581,10 +10615,15 @@ fold_offsetof_1 (tree expr)
       off = size_binop_loc (input_location, PLUS_EXPR, DECL_FIELD_OFFSET (t),
 			    size_int (tree_to_uhwi (DECL_FIELD_BIT_OFFSET (t))
 				      / BITS_PER_UNIT));
+      /* At this point we've either diagnosed a prior out of bounds array
+	 index above, or the array index is valid (or one hasn't been yet
+	 seen).  Reset MAX_INDEX to unknown/don't care and start from
+	 scratch.  */
+      pctx->max_index = -1;
       break;

     case ARRAY_REF:
-      base = fold_offsetof_1 (TREE_OPERAND (expr, 0));
+      base = fold_offsetof_1 (TREE_OPERAND (expr, 0), pctx);
       if (base == error_mark_node)
 	return base;

@@ -10603,6 +10642,8 @@ fold_offsetof_1 (tree expr)
 				    build_int_cst (TREE_TYPE (upbound), 1));
 	      if (tree_int_cst_lt (upbound, t))
 		{
+		  pctx->index = t;
+
 		  tree v;

 		  for (v = TREE_OPERAND (expr, 0);
@@ -10622,14 +10663,77 @@ fold_offsetof_1 (tree expr)
 		  /* Don't warn if the array might be considered a poor
 		     man's flexible array member with a very permissive
 		     definition thereof.  */
-		  if (TREE_CODE (v) == ARRAY_REF
-		      || TREE_CODE (v) == COMPONENT_REF)
+		  if (TREE_CODE (v) != ARRAY_REF
+		      && TREE_CODE (v) != COMPONENT_REF)
+		    pctx = NULL;
+		}
+	      else if (pctx->index == NULL_TREE
+		       && tree_int_cst_equal (upbound, t))
+		{
+		  /* The value of an index into an array is considered valid
+		     under any of the following conditions:
+
+		     1) it's less than the upper bound of the corresponding
+		     dimension of the array (where the upper bound is the
+		     size of the dimension, such as 3 in A[3]), or
+
+		     2) as a GCC extension, it's equal to the upper bound
+		     and the (non-existent, just-past-the-end) element it
+		     refers to is not used to reference a member of the
+		     element's type, or
+
+		     3) as a GCC extension, it's equal to the upper bound
+		     of the lower dimension of a flexible array member.
+
+		     For example, given struct S containing the member
+		     A[3][7], the expression offsetof (S, A[2][7]) is
+		     considered valid since it computes the equivalent
+		     of the address of A's last element plus one.  Given
+		     an object of struct S named s, this expression yields
+		     the same result as (char*)&s.A[2][7] - (char*)&s,
+		     which is a valid expression.
+		     On the other hand, offsetof (S, A[3]0]) is not valid,
+		     and neither is offsetof (S, A[2][7].foo).
+
+		     When an index is found to be equal to the upper bound
+		     of the lowest dimension, it is not known at this point
+		     in the recursive invocation of the function whether
+		     the offsetof expression is used to compute the offset
+		     of the just-past-the-end element at the index, or to
+		     designate a member of the type to which the just-past-
+		     the-end element refers (as in offsetof(S, A[2][7].foo)).
+		     Therefore, point the INDEX variable at the index and
+		     leave it up to the caller to decide whether or not
+		     to diagnose it.
+
+		     Special handling is also required for minor index
+		     values referring to the element just past the end
+		     of the array object.  */
+		  pctx->index = t;
+		  tree_code lastcode = TREE_CODE (TREE_OPERAND (expr, 0));
+		  if ((lastcode != ARRAY_REF && pctx != &ctx)
+		      || (pctx == &ctx && pctx->max_index))
+		    pctx = NULL;
+		}
+	      else
+		{
+		  HOST_WIDE_INT ubi = tree_to_shwi (upbound);
+		  HOST_WIDE_INT idx = tree_to_shwi (t);
+
+		  if (pctx->max_index)
+		    pctx->max_index = idx + 1 == ubi;
+		}
+	    }
+	}
+
+      if (pctx && pctx->index)
+	{
 	  warning (OPT_Warray_bounds,
 		   "index %E denotes an offset "
 		   "greater than size of %qT",
-			     t, TREE_TYPE (TREE_OPERAND (expr, 0)));
-		}
-	    }
+		   pctx->index, TREE_TYPE (TREE_OPERAND (expr, 0)));
+
+	  pctx->index = NULL_TREE;
 	}

       t = convert (sizetype, t);
@@ -10640,7 +10744,7 @@ fold_offsetof_1 (tree expr)
       /* Handle static members of volatile structs.  */
       t = TREE_OPERAND (expr, 1);
       gcc_assert (VAR_P (t));
-      return fold_offsetof_1 (t);
+      return fold_offsetof_1 (t, pctx);

     default:
       gcc_unreachable ();
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 74d1bc1..a4ccf2f 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1015,7 +1015,8 @@ extern bool c_dump_tree (void *, tree);

 extern void verify_sequence_points (tree);

-extern tree fold_offsetof_1 (tree);
+struct offsetof_ctx_t;
+extern tree fold_offsetof_1 (tree, offsetof_ctx_t * = NULL);
 extern tree fold_offsetof (tree);

 /* Places where an lvalue, or modifiable lvalue, may be required.
diff --git a/gcc/testsuite/c-c++-common/builtin-offsetof-2.c b/gcc/testsuite/c-c++-common/builtin-offsetof-2.c
new file mode 100644
index 0000000..490ecf1
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/builtin-offsetof-2.c
@@ -0,0 +1,212 @@
+// { dg-options "-Warray-bounds" }
+// { dg-do compile }
+
+// Test case exercising pr c/67882 - surprising offsetof result
+//   on an invalid array member without diagnostic.
+
+typedef struct A1 {
+  char a1[1];
+  char c;
+} A1;
+
+typedef struct A1_x_2 {
+  char a1[1];
+  char a[][2];
+} A1_x_2;
+
+typedef struct A1_1_x {
+  char a1_1[1][1];
+  char a[];
+} A1_1_x;
+
+typedef struct Ax_2_3 {
+  int i;
+  char a_x_2_3[][2][3];
+} Ax_2_3;
+
+typedef struct A1_1 {
+  char a1_1[1][1];
+  char c;
+} A1_1;
+
+typedef struct B {
+  A1_1 a2_3[2][3];
+  char a1_1[3][5];
+  char a[];
+} B;
+
+// Structures with members that contain flexible array members are
+// an extension accepted by GCC.
+typedef struct C {
+  A1_1_x a5_7 [5][7];
+  int a;
+} C;
+
+// Structs with a "fake" flexible array member (a GCC extension).
+typedef struct FA0 {
+  int i;
+  char a0 [0];
+} FA0;
+
+typedef struct FA1 {
+  int i;
+  char a1 [1];
+} FA1;
+
+typedef struct FA3 {
+  int i;
+  char a3 [3];
+} FA3;
+
+// A "fake" multidimensional flexible array member deserves special
+// treatment since the offsetof exression yields the same result for
+// references to apparently distinct members (e.g., a5_7[0][7] is
+// at the same offset as a5_7[1][0] which may be surprising).
+typedef struct FA5_7 {
+  int i;
+  char a5_7 [5][7];
+} FA5_7;
+
+static void test (void)
+{
+  // Verify that offsetof references to array elements past the end of
+  // the array member are diagnosed.  As an extension, permit references
+  // to the element just past-the-end of the array.
+
+  int a[] = {
+    __builtin_offsetof (A1, a1),                 // valid
+    __builtin_offsetof (A1, a1 [0]),             // valid
+
+    // The following expression is silently accepted as an extension
+    // because it simply forms the equivalent of an address pointing
+    // just past the last element of the array and forming such an
+    // adderess is valid in C, even though referring to the element
+    // in an offsetof expression is strictly not.
+    __builtin_offsetof (A1, a1 [1]),             // valid
+    __builtin_offsetof (A1, a1 [2]),             // { dg-warning "index" }
+
+    __builtin_offsetof (A1_x_2, a1),             // valid
+    __builtin_offsetof (A1_x_2, a1 [0]),         // valid
+    __builtin_offsetof (A1_x_2, a1 [1]),         // extension
+    __builtin_offsetof (A1_x_2, a1 [2]),         // { dg-warning "index" }
+
+    __builtin_offsetof (A1_x_2, a),              // valid
+    __builtin_offsetof (A1_x_2, a [0]),          // valid
+    __builtin_offsetof (A1_x_2, a [1]),          // valid
+    __builtin_offsetof (A1_x_2, a [99]),         // valid
+
+    __builtin_offsetof (A1_x_2, a),              // valid
+    __builtin_offsetof (A1_x_2, a [0][0]),       // valid
+    __builtin_offsetof (A1_x_2, a [0][1]),       // valid
+
+    // The following is silently accepted because A is a flexible array
+    // member with the type of T[][2] and we can't tell what its runtime
+    // major dimension is.  If it happens to be 1 then A[0][2] refers
+    // to just-past-the-last-element which is accepted as an extension.
+    // If it's greater than 1 then we fail to diagnose this case as
+    // an unavoidable false neagtive.
+    __builtin_offsetof (A1_x_2, a [0][2]),       // extension
+
+    // Unlike the case above, this is clearly invalid since it refers
+    // to an element past one one just-past-the-end in A[][2].
+    __builtin_offsetof (A1_x_2, a [0][3]),       // { dg-warning "index" }
+
+    __builtin_offsetof (A1_x_2, a [1][0]),       // valid
+    __builtin_offsetof (A1_x_2, a [1][1]),       // valid
+    __builtin_offsetof (A1_x_2, a [1][2]),       // extension (see above)
+    __builtin_offsetof (A1_x_2, a [99][0]),      // valid
+    __builtin_offsetof (A1_x_2, a [99][1]),      // valid
+    __builtin_offsetof (A1_x_2, a [99][2]),      // extension (see above)
+
+    __builtin_offsetof (A1_1_x, a),              // valid
+    __builtin_offsetof (A1_1_x, a [0]),          // valid
+    __builtin_offsetof (A1_1_x, a [1]),          // valid
+    __builtin_offsetof (A1_1_x, a [99]),         // valid
+
+    __builtin_offsetof (A1_1_x, a1_1 [0][0]),    // valid
+    __builtin_offsetof (A1_1_x, a1_1 [0][1]),    // extension
+    __builtin_offsetof (A1_1_x, a1_1 [1][0]),    // { dg-warning "index" }
+    __builtin_offsetof (A1_1_x, a1_1 [1][1]),    // { dg-warning "index" }
+
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][1][3]),  // extension
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][1][4]),  // { dg-warning "index" }
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][2]),     // extension
+    __builtin_offsetof (Ax_2_3, a_x_2_3 [0][2][0]),  // { dg-warning "index" }
+
+    __builtin_offsetof (B, a2_3 [0][0].c),           // valid
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [0][0]), // valid
+    __builtin_offsetof (B, a2_3 [1][3]),             // extension
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [0][1]), // extension
+
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [0][0].a1_1 [1][1]), // { dg-warning "index" }
+
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [0][0]), // valid
+
+    // Forming an offset to the just-past-end element is accepted.
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [0][1]), // extension
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][2].a1_1 [1][1]), // { dg-warning "index" }
+
+    // Forming an offset to the just-past-end element is accepted.
+    __builtin_offsetof (B, a2_3 [1][3]),             // exension
+    // ...but these are diagnosed because they dereference a just-path-the-end
+    // element.
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [0][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].a1_1 [1][1]), // { dg-warning "index" }
+
+    // Analogous to the case above, these are both diagnosed because they
+    // dereference just-path-the-end elements of the a2_3 array.
+    __builtin_offsetof (B, a2_3 [1][3].c),       // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [1][3].c),       // { dg-warning "index" }
+
+    // The following are all invalid because of the reference to a2_3[2].
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [0][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [0][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [1][0]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].a1_1 [1][1]), // { dg-warning "index" }
+    __builtin_offsetof (B, a2_3 [2][0].c),           // { dg-warning "index" }
+
+    __builtin_offsetof (C, a5_7 [4][6]),
+    __builtin_offsetof (C, a5_7 [4][6].a),
+    __builtin_offsetof (C, a5_7 [4][6].a [0]),
+    __builtin_offsetof (C, a5_7 [4][6].a [99]),
+
+    __builtin_offsetof (C, a5_7 [4][7]),             // extension
+    // Diagnose the following even though the object whose offset is
+    // computed is a flexible array member.
+    __builtin_offsetof (C, a5_7 [4][7].a),           // { dg-warning "index" }
+    __builtin_offsetof (C, a5_7 [4][7].a [0]),       // { dg-warning "index" }
+    __builtin_offsetof (C, a5_7 [4][7].a [99]),      // { dg-warning "index" }
+
+    // Verify that no diagnostic is issued for offsetof expressions
+    // involving structs where the array has a rank of 1 and is the last
+    // member (e.g., those are treated as flexible array members).
+    __builtin_offsetof (FA0, a0 [0]),
+    __builtin_offsetof (FA0, a0 [1]),
+    __builtin_offsetof (FA0, a0 [99]),
+
+    __builtin_offsetof (FA1, a1 [0]),
+    __builtin_offsetof (FA1, a1 [1]),
+    __builtin_offsetof (FA1, a1 [99]),
+
+    __builtin_offsetof (FA3, a3 [0]),
+    __builtin_offsetof (FA3, a3 [3]),
+    __builtin_offsetof (FA3, a3 [99]),
+
+    __builtin_offsetof (FA5_7, a5_7 [0][0]),
+
+    // Unlike one-dimensional arrays, verify that out-of-bounds references
+    // to arrays with rank of 2 and greater are diagnosed.
+    __builtin_offsetof (FA5_7, a5_7 [0][7]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [1][7]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [5][0]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [5][7]),         // { dg-warning "index" }
+    __builtin_offsetof (FA5_7, a5_7 [99][99])        // { dg-warning "index" }
+  };
+
+  (void)&a;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]