This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Add support to trace comparison instructions and switch statements
- From: Jakub Jelinek <jakub at redhat dot com>
- To: 吴潍浠(此彼) <weixi dot wwx at antfin dot com>
- Cc: gcc <gcc at gcc dot gnu dot org>, gcc-patches <gcc-patches at gcc dot gnu dot org>, Jeff Law <law at redhat dot com>, wishwu007 <wishwu007 at gmail dot com>
- Date: Fri, 1 Sep 2017 18:23:18 +0200
- Subject: Re: Add support to trace comparison instructions and switch statements
- Authentication-results: sourceware.org; auth=none
- Authentication-results: ext-mx03.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com
- Authentication-results: ext-mx03.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=jakub at redhat dot com
- Dmarc-filter: OpenDMARC Filter v1.3.2 mx1.redhat.com EB5657E439
- References: <234840fd-a06a-4dfd-a1c5-254e26144754.weixi.wwx@antfin.com> <887030ab-6bae-4dd5-b347-05bfad034603.weixi.wwx@antfin.com>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
On Fri, Jul 21, 2017 at 01:38:17PM +0800, 吴潍浠(此彼) wrote:
> Hi Jeff
>
> I have signed the copyright assignment, and used the name 'Wish Wu' .
> Should I send you a copy of my assignment ?
>
> The attachment is my new patch with small changes.
> Codes are checked by ./contrib/check_GNU_style.sh, except some special files.
Please provide a ChangeLog entry, you can use ./contrib/mklog as a start.
@@ -975,6 +974,10 @@ fsanitize=
Common Driver Report Joined
Select what to sanitize.
+fsanitize-coverage=
+Common Driver Report Joined
+Select what to coverage sanitize.
+
Why Driver? The reason fsanitize= needs it is that say for
-fsanitize=address we add libraries in the driver, etc., but that
isn't the case for the coverage, right?
--- gcc/flag-types.h (revision 250199)
+++ gcc/flag-types.h (working copy)
@@ -250,6 +250,14 @@ enum sanitize_code {
| SANITIZE_BOUNDS_STRICT
};
+/* Different trace modes. */
+enum sanitize_coverage_code {
+ /* Trace PC. */
+ SANITIZE_COV_TRACE_PC = 1UL << 0,
+ /* Trace Compare. */
+ SANITIZE_COV_TRACE_CMP = 1UL << 1
+};
No need for UL suffixes, the reason sanitize_code uses them is
that it includes 1 << 16 and above and might be included even in target code
(for host we require 32-bit integers, for target code it might be just
16-bit).
--- gcc/opts.c (revision 250199)
+++ gcc/opts.c (working copy)
@@ -1519,6 +1519,17 @@ const struct sanitizer_opts_s sanitizer_opts[] =
{ NULL, 0U, 0UL, false }
};
+/* -f{,no-}sanitize-coverage= suboptions. */
+const struct sanitizer_opts_s coverage_sanitizer_opts[] =
+{
+#define SANITIZER_OPT(name, flags, recover) \
+ { #name, flags, sizeof #name - 1, recover }
+ SANITIZER_OPT (trace-pc, SANITIZE_COV_TRACE_PC, false),
+ SANITIZER_OPT (trace-cmp, SANITIZE_COV_TRACE_CMP, false),
+#undef SANITIZER_OPT
+ { NULL, 0U, 0UL, false }
No need to have the recover argument for the macro, just add false to it
(unless you want to use a different struct type that wouldn't even include
that member).
+/* Given ARG, an unrecognized coverage sanitizer option, return the best
+ matching coverage sanitizer option, or NULL if there isn't one. */
+
+static const char *
+get_closest_coverage_sanitizer_option (const string_fragment &arg)
+{
+ best_match <const string_fragment &, const char*> bm (arg);
+ for (int i = 0; coverage_sanitizer_opts[i].name != NULL; ++i)
+ {
+ bm.consider (coverage_sanitizer_opts[i].name);
+ }
Body which contains just one line shouldn't be wrapped in {}s, just use
for (int i = 0; coverage_sanitizer_opts[i].name != NULL; ++i)
bm.consider (coverage_sanitizer_opts[i].name);
+unsigned int
+parse_coverage_sanitizer_options (const char *p, location_t loc,
+ unsigned int flags, int value, bool complain)
Wrong formatting, unsigned int should go below const char *, like:
parse_coverage_sanitizer_options (const char *p, location_t loc,
unsigned int flags, int value, bool complain)
+{
+ while (*p != 0)
+ {
+ size_t len, i;
+ bool found = false;
+ const char *comma = strchr (p, ',');
+
+ if (comma == NULL)
+ len = strlen (p);
+ else
+ len = comma - p;
+ if (len == 0)
+ {
+ p = comma + 1;
+ continue;
+ }
+
+ /* Check to see if the string matches an option class name. */
+ for (i = 0; coverage_sanitizer_opts[i].name != NULL; ++i)
+ if (len == coverage_sanitizer_opts[i].len
+ && memcmp (p, coverage_sanitizer_opts[i].name, len) == 0)
+ {
+ if (value)
+ flags |= coverage_sanitizer_opts[i].flag;
+ else
+ flags &= ~coverage_sanitizer_opts[i].flag;
+ found = true;
+ break;
+ }
+
+ if (! found && complain)
+ {
+ const char *hint
+ = get_closest_coverage_sanitizer_option (string_fragment (p, len));
+
+ if (hint)
+ error_at (loc,
+ "unrecognized argument to "
+ "-f%ssanitize-coverage= option: %q.*s;"
+ " did you mean %qs?",
+ value ? "" : "no-",
+ (int) len, p, hint);
+ else
+ error_at (loc,
+ "unrecognized argument to "
+ "-f%ssanitize-coverage= option: %q.*s",
+ value ? "" : "no-",
+ (int) len, p);
+ }
+
+ if (comma == NULL)
+ break;
+ p = comma + 1;
+ }
+ return flags;
+}
Though, looking at the above, it sounds like there is just way too much
duplication. So, maybe better just use the parse_sanitizer_options
and get_closest_coverage_option functions for all of
-f{,no-}sanitize{,-recover,-coverage}= , add
const struct sanitizer_opts_s *opts = sanitizer_opts;
if (code == OPT_fsanitize_coverage_)
opts = coverage_sanitizer_opts;
early in both functions, deal with the diagnostics (to print "-coverage"
when needed) and look if there is anything else that needs to be
conditionalized.
@@ -1943,6 +2032,12 @@ common_handle_option (struct gcc_options *opts,
&= ~(SANITIZE_UNDEFINED | SANITIZE_UNDEFINED_NONDEFAULT);
break;
+ case OPT_fsanitize_coverage_:
+ opts->x_flag_sanitize_coverage
+ = parse_coverage_sanitizer_options (arg, loc,
+ opts->x_flag_sanitize_coverage, value, true);
Wrong formatting, opts should go below arg. But if you use
process_sanitizer_options as mentioned above, it will be a non-issue.
+ break;
+
case OPT_O:
case OPT_Os:
case OPT_Ofast:
Index: gcc/sancov.c
===================================================================
--- gcc/sancov.c (revision 250199)
+++ gcc/sancov.c (working copy)
@@ -1,6 +1,7 @@
/* Code coverage instrumentation for fuzzing.
Copyright (C) 2015-2017 Free Software Foundation, Inc.
- Contributed by Dmitry Vyukov <dvyukov@google.com>
+ Contributed by Dmitry Vyukov <dvyukov@google.com> and
+ Wish Wu <wishwu007@gmail.com>
This file is part of GCC.
@@ -29,31 +30,202 @@ along with GCC; see the file COPYING3. If not see
#include "flags.h"
#include "stmt.h"
#include "gimple-iterator.h"
+#include "tree-core.h"
tree-core.h is already included by tree.h, why do you need to include it
explicitly?
#include "tree-cfg.h"
#include "tree-pass.h"
#include "tree-iterator.h"
+#include "fold-const.h"
+#include "stringpool.h"
+#include "output.h"
+#include "cgraph.h"
#include "asan.h"
namespace {
+static void
+instrument_cond (gimple_stmt_iterator *gsi, gimple *stmt)
+{
+ unsigned int bitno;
+ tree lhs = gimple_cond_lhs (stmt);
+ tree rhs = gimple_cond_rhs (stmt);
+
+ bitno = MAX (TYPE_PRECISION (TREE_TYPE (lhs)),
+ TYPE_PRECISION (TREE_TYPE (rhs)));
1) this is C++, so you can mix declarations and code,
in this case there are even just declarations before it,
so you can use unsigned int bitno = ...;
2) better use prec than bitno for the variable name
3) MAX doesn't make sense, a GIMPLE_COND should have
both operands of compatible types
4) TYPE_PRECISION is only meaningful for some types, and could
mean something unrelated for some other types, I think you
want to move it after the check for type kind
+
+ if (TREE_CODE (TREE_TYPE (lhs)) == INTEGER_TYPE)
What about ENUMERAL_TYPEs (and maybe BOOLEAN_TYPEs)?
Shouldn't they be handled like INTEGER_TYPE, i.e. use
if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
?
+ {
+ enum built_in_function fncode;
+ switch (bitno)
+ {
+ case 8:
+ fncode = BUILT_IN_SANITIZER_COV_TRACE_CMP1;
+ break;
+
+ case 16:
+ fncode = BUILT_IN_SANITIZER_COV_TRACE_CMP2;
+ break;
+
+ case 32:
+ fncode = BUILT_IN_SANITIZER_COV_TRACE_CMP4;
+ break;
+
+ case 64:
+ fncode = BUILT_IN_SANITIZER_COV_TRACE_CMP8;
+ break;
+
+ default:
+ return;
+ }
+ tree fndecl = builtin_decl_implicit (fncode);
For signed integers, when all these builtins have unsigned arguments
it looks like pedantically invalid GIMPLE IL, you'd need to
cast the arguments to corresponding unsigned type.
+ gimple *gcall = gimple_build_call (fndecl, 2, lhs, rhs);
+ gimple_set_location (gcall, gimple_location (stmt));
+ gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
+ }
+ else if (TREE_CODE (TREE_TYPE (lhs)) == REAL_TYPE)
else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs))) ?
+ {
+ enum built_in_function fncode;
+ switch (bitno)
+ {
+ case 32:
+ fncode = BUILT_IN_SANITIZER_COV_TRACE_CMPF;
+ break;
+
+ case 64:
+ fncode = BUILT_IN_SANITIZER_COV_TRACE_CMPD;
+ break;
Not really sure it is a good idea to use a precision of
REAL_TYPE to decide which builtin to use.
I'd think it should be a check whether TYPE_MODE (TREE_TYPE (lhs))
is equal to TYPE_MODE (float_type_node) or TYPE_MODE (double_type_node).
+static void
+instrument_switch (gimple_stmt_iterator *gsi, gimple *stmt, function *fun)
+{
+ gswitch *switch_stmt = as_a<gswitch *> (stmt);
+ tree index = gimple_switch_index (switch_stmt);
+ unsigned bitno = TYPE_PRECISION (TREE_TYPE (index));
Again, please use prec instead of bitno.
Shouldn't you punt if prec > 64?
+ unsigned i, n = gimple_switch_num_labels (switch_stmt), num = 0;
+ for (i = 0; i < n; ++i)
+ {
+ tree label = gimple_switch_label (switch_stmt, i);
+ tree low_case = CASE_LOW (label);
+ if (low_case != NULL_TREE)
+ num++;
Wrong formatting, num++ should be just one tab indented, not tab + 2 spaces.
+ tree high_case = CASE_HIGH (label);
+ if (high_case != NULL_TREE)
+ num++;
Is that really how do you want to handle CASE_HIGH non-NULL? That is for
cases like:
case 'a' ... 'z':
In that case it is actually like 26 cases, not 2.
+ }
+
+ tree case_array_elem_type = build_type_variant (uint64_type_node, 1, 0);
+ tree case_array_type = build_array_type (case_array_elem_type,
+ build_index_type (size_int (num + 2
+ - 1)));
Better wrap like:
tree case_array_type
= build_array_type (...);
+ for (i = 0; i < n; ++i)
+ {
+ tree label = gimple_switch_label (switch_stmt, i);
+
+ tree low_case = CASE_LOW (label);
+ if (low_case != NULL_TREE)
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_int_cst (uint64_type_node,
+ TREE_INT_CST_LOW (low_case)));
You don't want build_int_cst, instead use fold_convert (uint64_type_node, low_case);
+ /* Insert callback to every compare statments. */
+ if (flag_sanitize_coverage & SANITIZE_COV_TRACE_CMP)
+ {
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_COND:
+ instrument_cond (&gsi, stmt);
+ break;
+
+ case GIMPLE_SWITCH:
+ instrument_switch (&gsi, stmt, fun);
+ break;
If you are just looking for GIMPLE_COND and GIMPLE_SWITCH, then both of them
always satisfy stmt_ends_bb_p and thus you don't need to walk the whole bb
to find them; just look at the last stmt in the bb.
That said, the question is what you really want to instrument and why.
Consider simple:
void bar (void);
void
foo (int x)
{
if (x == 21 || x == 64 || x == 98 || x == 135)
bar ();
}
testcase, in GIMPLE IL that will be depending on branch cost e.g. on x86_64:
<bb 2> [100.00%] [count: INV]:
_1 = x_8(D) == 21;
_2 = x_8(D) == 64;
_3 = _1 | _2;
if (_3 != 0)
goto <bb 4>; [33.00%] [count: INV]
else
goto <bb 3>; [67.00%] [count: INV]
<bb 3> [67.00%] [count: INV]:
_4 = x_8(D) == 98;
_5 = x_8(D) == 135;
_6 = _4 | _5;
if (_6 != 0)
goto <bb 4>; [50.00%] [count: INV]
else
goto <bb 5>; [50.00%] [count: INV]
<bb 4> [66.50%] [count: INV]:
bar (); [tail call]
<bb 5> [100.00%] [count: INV]:
return;
but e.g. on powerpc64:
<bb 2> [100.00%] [count: INV]:
if (x_2(D) == 21)
goto <bb 6>; [20.24%] [count: INV]
else
goto <bb 3>; [79.76%] [count: INV]
<bb 3> [79.76%] [count: INV]:
if (x_2(D) == 64)
goto <bb 6>; [34.00%] [count: INV]
else
goto <bb 4>; [66.00%] [count: INV]
<bb 4> [52.64%] [count: INV]:
if (x_2(D) == 98)
goto <bb 6>; [34.00%] [count: INV]
else
goto <bb 5>; [66.00%] [count: INV]
<bb 5> [34.74%] [count: INV]:
if (x_2(D) == 135)
goto <bb 6>; [34.00%] [count: INV]
else
goto <bb 7>; [66.00%] [count: INV]
<bb 6> [77.07%] [count: INV]:
bar (); [tail call]
<bb 7> [100.00%] [count: INV]:
return;
So in the powerpc64 case where it is closer to the source you'd instrument
comparisons with the 4 constants, while for x86_64 you wouldn't instrument
anything at all (as you don't instrument GIMPLE_COND with BOOLEAN_TYPE
operands - those have precision 1 and aren't INTEGRAL_TYPE, and those
GIMPLE_CONDs in there are actually artificial anyway and the original
comparisons are GIMPLE_ASSIGNs with EQ_EXPR comparison code).
Do you trace or should you trace the comparison kind, or do you really want
to treat x == 64, x > 64, x < 64, x != 64 etc. the same?
And, should what you trace be tied to basic blocks at whose start you add
the __sanitizer_cov_trace_pc instrumentation for
-fsanitize-coverage=trace-pc or is that completely unrelated?
Perhaps for -fsanitize-coverage= it might be a good idea to force
LOGICAL_OP_NON_SHORT_CIRCUIT/BRANCH_COST or whatever affects GIMPLE
decisions mentioned above so that the IL is closer to what the user wrote.
Jakub