This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix PR/8268: implement compile time array subscript checking


On 12/6/06, Dirk Mueller <dmuell@gmx.net> wrote:

Hi,


The patch below implements PR/8268, which seems to be the one major diagnostic
we're missing compared to icc (at least one openSUSE contributor regularly
rebuilds all of the openSUSE distribution with icc just to report those
warnings as bugs to us ;) ).

bootstrapped and regtested with no additional failures many times on
i686-suse-linux.

Ok? Do we need the extra -Warray-bounds?

Yes, we need the extra -Warray-bounds for languages that don't include it in -Wall and to turn the warning off.


:ADDPATCH diagnostic:


2006-12-06  Dirk Mueller  <dmueller@suse.de>
            Richard Guenther <rguenther@suse.de>

        PR diagnostic/8268
        * doc/invoke.texi (Warray-bounds): Document -Warray-bounds.
        * common.opt (Warray-bounds): Add new warning option.
        * c-opts.c (c_common_handle_option): Define -Warray-bounds
        if -Wall is given.
        * tree-vrp.c (vrp_finalize): Call check_array_refs if -Warray-bounds
        is enabled.
        (check_array_refs, check_array_bounds, check_array_ref): New.

* testsuite/gcc.dg/Warray-bounds.c: New testcase.

--- doc/invoke.texi     (revision 119391)
+++ doc/invoke.texi     (working copy)
@@ -244,7 +244,7 @@ Objective-C and Objective-C++ Dialects}.
 -Wreturn-type  -Wsequence-point  -Wshadow @gol
 -Wsign-compare  -Wstack-protector @gol
 -Wstrict-aliasing -Wstrict-aliasing=2 @gol
--Wstring-literal-comparison @gol
+-Wstring-literal-comparison -Warray-bounds @gol
 -Wswitch  -Wswitch-default  -Wswitch-enum @gol
 -Wsystem-headers  -Wtrigraphs  -Wundef  -Wuninitialized @gol
 -Wunknown-pragmas  -Wno-pragmas -Wunreachable-code @gol
@@ -2829,6 +2828,11 @@ compiler is using for optimization.  Thi
 @option{-Wstrict-aliasing}, but it will also give a warning for some
ambiguous
 cases that are safe.

+@item -Warray-bounds
+@opindex Warray-bounds
+This option is only active when @option{-O1} or higher is active. It warns
+about constant subscripts in array accesses that are out of bounds.
+
 @item -Wall
 @opindex Wall
 All of the above @samp{-W} options combined.  This enables all the

--- common.opt  (revision 119391)
+++ common.opt  (working copy)
@@ -61,6 +61,10 @@ Walways-true
 Common Var(warn_always_true)
 Warn about comparisons that always evaluate to true

+Warray-bounds
+Common Var(warn_array_bounds)
+Warn if an array is accessed out of bounds
+
 Wattributes
 Common Var(warn_attributes) Init(1)
 Warn about inappropriate attribute usage
Index: c-opts.c
===================================================================
--- c-opts.c    (revision 119391)
+++ c-opts.c    (working copy)
@@ -396,6 +396,7 @@ c_common_handle_option (size_t scode, co
       warn_strict_aliasing = value;
       warn_string_literal_comparison = value;
       warn_always_true = value;
+      warn_array_bounds = value;

       /* Only warn about unknown pragmas that are not in system
         headers.  */
Index: tree-vrp.c
===================================================================
--- tree-vrp.c  (revision 119391)
+++ tree-vrp.c  (working copy)
@@ -32,6 +32,7 @@ Boston, MA 02110-1301, USA.  */
 #include "tree-dump.h"
 #include "timevar.h"
 #include "diagnostic.h"
+#include "toplev.h"
 #include "cfgloop.h"
 #include "tree-scalar-evolution.h"
 #include "tree-ssa-propagate.h"
@@ -658,7 +659,6 @@ value_ranges_intersect_p (value_range_t
          || value_inside_range (vr0->max, vr1) == 1);
 }

-
 /* Return true if VR includes the value zero, false otherwise.  FIXME,
    currently this will return false for an anti-range like ~[-4, 3].
    This will be wrong when the semantics of value_inside_range are
@@ -3372,6 +3372,129 @@ insert_range_assertions (void)
   BITMAP_FREE (need_assert_for);
 }

+/*  Checks one ARRAY_REF. Ignores arrays that are flexible or
+    likely to be a struct hack. If VRP can determine that the
+    array subscript is a contant, check if it is outside valid
+    range. If the array subscript is a RANGE, warn if it is
+    non-overlapping with valid range.  */

You need two spaces after '.' but not after /*, also the parameters need documentation.

+static void
+check_array_ref (tree ref, location_t* locus, bool ignore_off_by_one)
+{
+  value_range_t* vr = NULL;
+  int rl, ru;
+  tree ls = TREE_OPERAND (ref, 1);
+  tree us = TREE_OPERAND (ref, 1);
+  tree lb = array_ref_low_bound(ref);
+  tree ub = array_ref_up_bound(ref);
+
+  if (!ub || !locus || TREE_NO_WARNING (ref)
+      || TREE_CODE (ub) != INTEGER_CST
+      /* Can not check flexible arrays.  */
+      || (TYPE_SIZE (TREE_TYPE (ref)) == NULL_TREE
+          && TYPE_DOMAIN (TREE_TYPE (ref)) != NULL_TREE
+          && TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (ref))) == NULL_TREE)
+      /* Accesses after the end of arrays of size 0 (gcc
+         extension) and 1 are likely intentional ("struct
+         hack").  */
+      || compare_tree_int (ub, 1) <= 0)
+    return;

The "struct hack" needs to include all arrays that are the last member in a structure. See tree-dfa.c:get_ref_base_and_extent - you can feed it the array_ref and check if *pmax_size is -1 (unbound or unknown). This function should also deal with flexible arrays (i.e. report them as unknown extent).

+
+  ub = fold_build2 (PLUS_EXPR, TREE_TYPE (ub), ub, integer_one_node);

You should use int_const_binop here.


+  if (TREE_CODE (ls) == SSA_NAME)
+    {
+      vr = get_value_range (ls);
+      if (vr->type == VR_RANGE || vr->type == VR_ANTI_RANGE)
+        {
+          ls = vr->type == VR_RANGE ? vr->max : vr->min;
+          us = vr->type == VR_RANGE ? vr->min : vr->max;
+        }
+    }
+
+  ru = TREE_CODE (us) == INTEGER_CST ? tree_int_cst_compare (ub, us) : -2;
+  rl = TREE_CODE (ls) == INTEGER_CST ? tree_int_cst_compare (ls, lb) : -2;

It looks like you can avoid adding 1 to ub by adjusting the checks below to ru <= 0.

+  TREE_NO_WARNING (ref ) = 1;
+
+  if (vr && vr->type == VR_ANTI_RANGE)
+    {
+      if (ru == -1 && rl == -1)
+        warning (OPT_Warray_bounds,
+                 "%Harray subscript is outside array bounds", locus);
+      else
+        TREE_NO_WARNING (ref) = 0;
+    }
+  else if (ru == -1 || (!ignore_off_by_one && ru == 0))
+    warning (OPT_Warray_bounds, "%Harray subscript is above array bounds",
+             locus);
+  else if (rl == -1)
+    warning (OPT_Warray_bounds, "%Harray subscript is below array bounds",
+             locus);
+  else
+    TREE_NO_WARNING (ref) = 0;
+}
+
+static bool array_bounds_already_done;
+
+/* Checks if this is an ARRAY_REF inside an ADDR_EXPR (in which an array
+   subscript one outside the valid range is allowed) or not. Call
+   check_array_ref for each ARRAY_REF.  */

Parameters need documenting.


+static tree
+check_array_bounds (tree *tp, int *walk_subtree, void *data)
+{
+ tree t = *tp;
+
+ *walk_subtree = TRUE;
+
+  if (TREE_CODE (t) == ARRAY_REF)
+    check_array_ref (t, (location_t*) data, false /*ignore_off_by_one*/);
+  else if (TREE_CODE (t) == ADDR_EXPR)
+    {
+       t = TREE_OPERAND (t, 0);
+       while (!array_bounds_already_done && handled_component_p (t))
+         {
+           if (TREE_CODE (t) == ARRAY_REF)
+             check_array_ref (t, (location_t*) data,
true /*ignore_off_by_one*/);
+           t = TREE_OPERAND (t, 0);
+         }
+       *walk_subtree = FALSE;
+    }
+
+  return NULL_TREE;
+}
+
+/* Walk over all statements of all reachable BBs and call check_array_bounds
+   on them.  */
+
+static void
+check_all_array_refs (void)
+{
+  basic_block bb;
+  block_stmt_iterator si;
+
+  FOR_EACH_BB (bb)
+    {
+      /* Skip bb's that are clearly unreachable.  */
+      if (single_pred_p (bb))
+      {
+       basic_block pred_bb = EDGE_PRED (bb, 0)->src;
+       tree ls = NULL_TREE;
+
+       if (!bsi_end_p (bsi_last (pred_bb)))
+         ls = bsi_stmt (bsi_last (pred_bb));
+
+       if (ls && TREE_CODE (ls) == COND_EXPR
+           && ((COND_EXPR_COND (ls) == boolean_false_node
+                && (EDGE_PRED (bb, 0)->flags & EDGE_TRUE_VALUE))
+               || (COND_EXPR_COND (ls) == boolean_true_node
+                   && (EDGE_PRED (bb, 0)->flags & EDGE_FALSE_VALUE))))
+         continue;
+      }
+      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+       walk_tree_without_duplicates (bsi_stmt_ptr (si), check_array_bounds,
+                                     EXPR_LOCUS (*bsi_stmt_ptr (si)));
+    }
+}

 /* Convert range assertion expressions into the implied copies and
    copy propagate away the copies.  Doing the trivial copy propagation
@@ -3577,7 +3700,7 @@ vrp_visit_assignment (tree stmt, tree *o

       return SSA_PROP_NOT_INTERESTING;
     }
-
+
   /* Every other statement produces no useful ranges.  */
   FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
     set_value_range_to_varying (get_value_range (def));
@@ -4684,6 +4807,12 @@ vrp_finalize (void)

substitute_and_fold (single_val_range, true);

+  if (warn_array_bounds)
+      check_all_array_refs();
+
+  /* workaround for PR/26726.  */
+  array_bounds_already_done = true;

This comment should be more elaborate.


   /* We must identify jump threading opportunities before we release
      the datastructures built by VRP.  */
   identify_jump_threads ();
Index: testsuite/gcc.dg/Warray-bounds.c
===================================================================
--- testsuite/gcc.dg/Warray-bounds.c    (revision 0)
+++ testsuite/gcc.dg/Warray-bounds.c    (revision 0)
@@ -0,0 +1,92 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Warray-bounds" } */
+
+int a[10];
+
+static inline int n(void) {
+    __SIZE_TYPE__ strlen(const char *s);
+    return strlen("12345");
+}
+
+void g(int *p);
+void h(int p);
+
+int* f(void) {
+    int b[10];
+    int i;
+    struct {
+       int c[10];
+    } c;
+
+    a[-1] = 0;             /* { dg-warning "array subscript" } */
+    a[ 0] = 0;
+    a[ 1] = 0;
+
+
+    a[ 9] = 0;
+    a[10] = 0;             /* { dg-warning "array subscript" } */
+    a[11] = 0;             /* { dg-warning "array subscript" } */
+    a[2 * n() - 11] = 0;    /* { dg-warning "array subscript" } */
+    a[2 * n() - 10] = 0;
+    a[2 * n() -  1] = 0;
+    a[2 * n() -  0] = 0;    /* { dg-warning "array subscript" } */
+
+    b[-1] = 0;             /* { dg-warning "array subscript" } */
+    b[ 0] = 0;
+    b[ 1] = 0;
+    b[ 9] = 0;
+    b[10] = 0;             /* { dg-warning "array subscript" } */
+    b[11] = 0;             /* { dg-warning "array subscript" } */
+    b[2 * n() - 11] = 0;    /* { dg-warning "array subscript" } */
+    b[2 * n() - 10] = 0;
+    b[2 * n() -  1] = 0;
+    b[2 * n() -  0] = 0;    /* { dg-warning "array subscript" } */
+
+    c.c[-1] = 0;           /* { dg-warning "array subscript" } */
+    c.c[ 0] = 0;
+    c.c[ 1] = 0;
+    c.c[ 9] = 0;
+    c.c[10] = 0;           /* { dg-warning "array subscript" } */
+    c.c[11] = 0;           /* { dg-warning "array subscript" } */
+    c.c[2 * n() - 11] = 0;  /* { dg-warning "array subscript" } */
+    c.c[2 * n() - 10] = 0;
+    c.c[2 * n() -  1] = 0;
+    c.c[2 * n() -  0] = 0;  /* { dg-warning "array subscript" } */
+
+    g(&a[8]);
+    g(&a[9]);
+    g(&a[10]);
+    g(&a[11]);             /* { dg-warning "array subscript" } */
+
+    g(&b[10]);
+    g(&c.c[10]);
+    g(&a[11]);             /* { dg-warning "array subscript" } */
+    g(&b[11]);             /* { dg-warning "array subscript" } */
+    g(&c.c[11]);           /* { dg-warning "array subscript" } */
+
+    g(&a[0]);
+    g(&b[0]);
+    g(&c.c[0]);
+
+    g(&a[-1]);             /* { dg-warning "array subscript" } */
+    g(&b[-1]);             /* { dg-warning "array subscript" } */
+    h(sizeof a[-1]);
+    h(sizeof a[10]);
+    h(sizeof b[-1]);
+    h(sizeof b[10]);
+    h(sizeof c.c[-1]);
+    h(sizeof c.c[10]);
+
+    if (10 < 10)
+       a[10] = 0;
+    if (10 < 10)
+       b[10] = 0;
+    if (-1 >= 0)
+       c.c[-1] = 0;
+
+    for (i = 20; i < 30; ++i)
+             a[i] = 1;       /* { dg-warning "array subscript" } */
+
+    return a;
+}
+

Otherwise this looks reasonable. Please incorporate the documentation changes suggested by Andrew. I think C family languages are ok for now, others can be added as a followup with approrpiate testcases.

Thanks,
Richard.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]