This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
getting operands of call arguments and return values
- From: Alexander Sotirov <alex at sotirov dot net>
- To: gcc at gcc dot gnu dot org
- Date: Tue, 25 Sep 2007 01:57:49 -0700
- Subject: getting operands of call arguments and return values
I'm working on a vulnerability detection pass for GCC, using a combination of
taint and value range propagation. The first version of the code (and a paper
describing the approach) is available at http://gcc.vulncheck.org/
The tree-ssa infrastructure was very useful for implementing this type of
analysis. There was only one place where I had to extend GCC, and I wanted to
share it with the list.
If we have a statement "b = foo(a, buf)" and we dump its operands, we'll get
the following:
Visiting stmt: b_2 = foo (a_1, &buf)
DEF: b_2
USE: a_1
VUSE:
VMUSTDEF:
VMAYDEF: buf_6 = buf_4 NONLOCAL.5_7 = NONLOCAL.5_5
The taint propagation algorithm needs to mark the return value of the function
as tainted, but how can it tell which defs are associated with the return
value? In the example above we need to taint b_2, but not buf_6.
To solve this, I implemented two functions:
tree get_lhs_operands (tree stmt);
tree get_call_arg_operands (tree stmt, tree call, int argnum);
The first function takes a MODIFY_EXPR and returns an artificial statement that
contains only the operands of the LHS of the MODIFY_EXPR. In the example above
it would return:
DEF: b_2
USE:
VUSE:
VMUSTDEF:
VMAYDEF:
The second function takes a statement, a CALL_EXPR and an argument number. It
returns an artificial statement that contains only the operands of the
specified function argument. Let's dump the operands of the first two arguments
from the example above:
arg1:
DEF:
USE: a_1
VUSE:
VMUSTDEF:
VMAYDEF:
arg2:
DEF:
USE:
VUSE:
VMUSTDEF:
VMAYDEF: buf_6 = buf_4
The two functions are implemented in the patch below. The code is kinda hacky
and I am sure that there is a better way to do this, but I was trying to keep
the patch as small and self-contained as possible.
Doing it right requires deeper understanding of the operand cache
implementation than I have. I'm not trying to get this into GCC, but perhaps
somebody will find this patch useful.
Take care,
Alexander Sotirov
diff -ruN gcc-4.2.1.orig/gcc/tree-ssa-operands.c gcc-4.2.1/gcc/tree-ssa-operands.c
--- gcc-4.2.1.orig/gcc/tree-ssa-operands.c 2007-06-15 15:10:09.000000000 -0700
+++ gcc-4.2.1/gcc/tree-ssa-operands.c 2007-09-16 20:03:36.000000000 -0700
@@ -130,6 +130,14 @@
static maydef_optype_p free_maydefs = NULL;
static mustdef_optype_p free_mustdefs = NULL;
+/* Temoporarily set by get_lhs_operands to tell get_modify_expr_operands
+ to process only the LHS operands. */
+static int operands_lhs_only = 0;
+
+/* Temporarily set by get_call_arg_operands to tell get_call_expr_operands
+ to process only the operands for a specific argument. */
+static int operands_call_arg = 0;
+
/* Allocates operand OP of given TYPE from the appropriate free list,
or of the new value if the list is empty. */
@@ -1680,6 +1688,46 @@
tree op;
int call_flags = call_expr_flags (expr);
+ /* If operands_call_arg is set, add only the operands for the
+ specified argument of the function. */
+ if (operands_call_arg)
+ {
+ int i;
+ stmt_ann_t s_ann = stmt_ann (stmt);
+
+ /* Find the specified operand. */
+ op = TREE_OPERAND (expr, 1);
+ for (i = 1; op && (i < operands_call_arg); i++)
+ op = TREE_CHAIN (op);
+
+ if (op != NULL)
+ {
+ get_expr_operands (stmt, &TREE_VALUE (op), opf_none);
+
+ /* Create virtual defs and uses for variables that have their
+ addresses taken in the operand. Based on code in the
+ update_alias_info function in tree-ssa-structalias.c */
+ bitmap addr_taken = addresses_taken (expr);
+ if (addr_taken)
+ {
+ bitmap_iterator bi;
+ unsigned i;
+
+ EXECUTE_IF_SET_IN_BITMAP (addr_taken, 0, i, bi)
+ {
+ tree var = referenced_var (i);
+
+ if (unmodifiable_var_p (var))
+ add_stmt_operand (&var, s_ann, opf_none);
+ else
+ add_stmt_operand (&var, s_ann, opf_is_def);
+ }
+ }
+ }
+
+ return;
+ }
+
/* If aliases have been computed already, add V_MAY_DEF or V_USE
operands for all the symbols that have been found to be
call-clobbered.
@@ -1816,7 +1864,8 @@
get_modify_expr_operands (tree stmt, tree expr)
{
/* First get operands from the RHS. */
- get_expr_operands (stmt, &TREE_OPERAND (expr, 1), opf_none);
+ if (!operands_lhs_only)
+ get_expr_operands (stmt, &TREE_OPERAND (expr, 1), opf_none);
/* For the LHS, use a regular definition (OPF_IS_DEF) for GIMPLE
registers. If the LHS is a store to memory, we will either need
@@ -2585,4 +2634,72 @@
dump_immediate_uses_for (stderr, var);
}
+
+/* Given a MODIFY_EXPR, returns an artificial statement containing only
+ the operands from the left-hand side of the assignment. */
+
+tree
+get_lhs_operands (tree stmt)
+{
+ tree new;
+ ssa_op_iter iter;
+ use_operand_p use_p;
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ return NULL;
+
+ /* Create a copy of the MODIFY_EXPR with en empty operand cache. */
+ new = build2 (MODIFY_EXPR, TREE_TYPE (stmt), TREE_OPERAND (stmt, 0),
+ TREE_OPERAND (stmt, 1));
+
+ /* Copy virtual operands from the old statement. They will be
+ replaced when we get the new operands, but we need the old
+ ones to get the correct SSA names. */
+ update_stmt_if_modified (stmt);
+ copy_virtual_operands (new, stmt);
+
+ operands_lhs_only = 1;
+ update_stmt_operands (new);
+ operands_lhs_only = 0;
+
+ /* All uses in this fake stmt must not be in the immediate use lists. */
+ delink_stmt_imm_use (new);
+
+ return new;
+}
+
+
+/* Given a CALL_EXPR and an argument number, returns an artificial
+ statement containing only the operands from the specified argument. */
+
+tree
+get_call_arg_operands (tree stmt, tree call, int argnum)
+{
+ tree new;
+ ssa_op_iter iter;
+ use_operand_p use_p;
+
+ if (TREE_CODE (call) != CALL_EXPR)
+ return NULL;
+
+ /* Create a copy of the CALL_EXPR with en empty operand cache. */
+ new = build3 (CALL_EXPR, TREE_TYPE (call), TREE_OPERAND (call, 0),
+ TREE_OPERAND (call, 1), TREE_OPERAND (call, 2));
+
+ /* Copy virtual operands from the old statement. They will be
+ replaced when we get the new operands, but we need the old
+ ones to get the correct SSA names. */
+ update_stmt_if_modified (stmt);
+ copy_virtual_operands (new, stmt);
+
+ operands_call_arg = argnum;
+ update_stmt_operands (new);
+ operands_call_arg = 0;
+
+ /* All uses in this fake stmt must not be in the immediate use lists. */
+ delink_stmt_imm_use (new);
+
+ return new;
+}
+
#include "gt-tree-ssa-operands.h"
diff -ruN gcc-4.2.1.orig/gcc/tree-ssa-operands.h gcc-4.2.1/gcc/tree-ssa-operands.h
--- gcc-4.2.1.orig/gcc/tree-ssa-operands.h 2006-01-18 17:42:48.000000000 -0800
+++ gcc-4.2.1/gcc/tree-ssa-operands.h 2007-09-16 16:05:30.000000000 -0700
@@ -169,6 +169,9 @@
extern void add_to_addressable_set (tree, bitmap *);
+extern tree get_lhs_operands (tree stmt);
+extern tree get_call_arg_operands (tree stmt, tree call, int argnum);
+
enum ssa_op_iter_type {
ssa_op_iter_none = 0,
ssa_op_iter_tree,