This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH 1/2] PR c/68757: fix uninitialied src_range for various builtins
- From: David Malcolm <dmalcolm at redhat dot com>
- To: Bernd Schmidt <bschmidt at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Tue, 08 Dec 2015 11:02:16 -0500
- Subject: Re: [PATCH 1/2] PR c/68757: fix uninitialied src_range for various builtins
- Authentication-results: sourceware.org; auth=none
- References: <1449589428-10681-1-git-send-email-dmalcolm at redhat dot com> <5666F95E dot 2080908 at redhat dot com>
On Tue, 2015-12-08 at 16:38 +0100, Bernd Schmidt wrote:
> On 12/08/2015 04:43 PM, David Malcolm wrote:
> > This fixes various uninitialized src_range of c_expr, this time
> > in the various builtins that are parsed via c_parser_get_builtin_args.
> >
> > Bootstrapped®rtested on x86_64-pc-linux-gnu.
> >
> > OK for trunk?
>
> I think both of these patches are OK. Some questions though.
>
> > " a constant");
> > constant_expression_warning (c);
> > expr = integer_zerop (c) ? *e3_p : *e2_p;
> > + set_c_expr_source_range (&expr, loc, close_paren_loc);
>
> If that had an uninitialized range, it implies that the *eN_p
> expressions also have uninitialized parts. Correct? If so, I think we
> should fix that.
Hmmm... re-reading the patch, I think my description was sloppy; sorry.
I believe uninitialized data affected RID_BUILTIN_COMPLEX and
RID_BUILTIN_SHUFFLE, whereas RID_CHOOSE_EXPR and
RID_BUILTIN_CALL_WITH_STATIC_CHAIN copied src_range data from
subexpressions.
The fix for the uninitialized data made me think "what should the source
range of RID_BUILTIN_COMPLEX be?" Hence I noticed that each of the
places where c_parser_get_builtin_args was used didn't have an explicit
choice about src_range, and that it ought to use the builtin args in its
range, and so I updated these.
Within RID_CHOOSE_EXPR and RID_BUILTIN_CALL_WITH_STATIC_CHAIN, *eN_P
come from c_parser_get_builtin_args, and reviewing that function, it
purely gets them by copying them from the return value of
c_parser_expr_no_commas, so there's no uninitialized data introduced
there. [There could be an argument that for RID_CHOOSE_EXPR we should
use the range of the chosen expression, which was the status quo for
that case; the patch makes it use the range of the full
__builtin_choose_expr () for consistency with other the builtins].
(I believe the proposed ChangeLog does accurately reflect the change;
just the title may need tweaking).
> Also, what happened to the idea of a constructor for c_expr_t?
I actually implemented something like this when implementing these two
patches.
Work-in-progress patch attached, which introduces an INVALID_LOCATION
value for source_location/location_t, and uses it to "poison" the
initial value of c_expr's src_range, with lots of assertions to verify
that it's been overwritten by time we use it.
It doesn't fully work yet, but much of gcc.dg survives with this (and
it's what I used to detect the alignof issue in patch 2). Does this
look like something I should pursue?
>From cf8aa843a7bb5f173eb8107b1c721e964b9e0677 Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Mon, 7 Dec 2015 13:47:03 -0500
Subject: [PATCH] WIP: add INVALID_LOCATION and use it to poison src_range
---
gcc/c/c-parser.c | 8 +++++++-
gcc/c/c-tree.h | 26 ++++++++++++++++++++++++++
gcc/c/c-typeck.c | 2 ++
gcc/tree.c | 6 ++++++
libcpp/include/line-map.h | 10 +++++++++-
libcpp/line-map.c | 7 +++++++
6 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 4611e5b..cded9fe 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -63,6 +63,8 @@ void
set_c_expr_source_range (c_expr *expr,
location_t start, location_t finish)
{
+ gcc_assert (start != INVALID_LOCATION);
+ gcc_assert (finish != INVALID_LOCATION);
expr->src_range.m_start = start;
expr->src_range.m_finish = finish;
if (expr->value)
@@ -73,6 +75,8 @@ void
set_c_expr_source_range (c_expr *expr,
source_range src_range)
{
+ gcc_assert (src_range.m_start != INVALID_LOCATION);
+ gcc_assert (src_range.m_finish != INVALID_LOCATION);
expr->src_range = src_range;
if (expr->value)
set_source_range (expr->value, src_range);
@@ -6145,6 +6149,8 @@ c_parser_expr_no_commas (c_parser *parser, struct c_expr *after,
c_parser_consume_token (parser);
exp_location = c_parser_peek_token (parser)->location;
rhs = c_parser_expr_no_commas (parser, NULL);
+ gcc_assert (rhs.src_range.m_start != INVALID_LOCATION);
+ gcc_assert (rhs.src_range.m_finish != INVALID_LOCATION);
rhs = convert_lvalue_to_rvalue (exp_location, rhs, true, true);
ret.value = build_modify_expr (op_location, lhs.value, lhs.original_type,
@@ -6917,7 +6923,6 @@ c_parser_alignof_expression (c_parser *parser)
}
else
{
- struct c_expr ret;
expr = c_parser_unary_expression (parser);
end_loc = expr.src_range.m_finish;
alignof_expr:
@@ -6927,6 +6932,7 @@ c_parser_alignof_expression (c_parser *parser)
pedwarn (start_loc,
OPT_Wpedantic, "ISO C does not allow %<%E (expression)%>",
alignof_spelling);
+ struct c_expr ret;
ret.value = c_alignof_expr (start_loc, expr.value);
ret.original_code = ERROR_MARK;
ret.original_type = NULL;
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index 00e72b1..30625ee 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -118,6 +118,32 @@ along with GCC; see the file COPYING3. If not see
for code generation alongside a tree representing its value. */
struct c_expr
{
+ c_expr ()
+ {
+ /* In a checkeed build, poison the initial value of src_range so
+ that attempts to use uninitialized data fail with an assertion,
+ without needing to run under Valgrind. */
+#if CHECKING_P
+ src_range.m_start = INVALID_LOCATION;
+ src_range.m_finish = INVALID_LOCATION;
+#endif
+ }
+
+ c_expr (const c_expr &other)
+ {
+ value = other.value;
+ original_code = other.original_code;
+ original_type = other.original_type;
+#if CHECKING_P
+ if (value != error_mark_node)
+ {
+ gcc_assert (other.src_range.m_start != INVALID_LOCATION);
+ gcc_assert (other.src_range.m_finish != INVALID_LOCATION);
+ }
+#endif
+ src_range = other.src_range;
+ }
+
/* The value of the expression. */
tree value;
/* Record the original unary/binary operator of an expression, which may
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index b691072..97064ae 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -6821,6 +6821,8 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype,
expr.value = inside_init;
expr.original_code = (strict_string ? STRING_CST : ERROR_MARK);
expr.original_type = NULL;
+ expr.src_range.m_start = init_loc;
+ expr.src_range.m_finish = init_loc;
maybe_warn_string_init (init_loc, type, expr);
if (TYPE_DOMAIN (type) && !TYPE_MAX_VALUE (TYPE_DOMAIN (type)))
diff --git a/gcc/tree.c b/gcc/tree.c
index c8b3ab8..db429c7 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -13938,6 +13938,9 @@ set_block (location_t loc, tree block)
location_t
set_source_range (tree expr, location_t start, location_t finish)
{
+ gcc_assert (start != INVALID_LOCATION);
+ gcc_assert (finish != INVALID_LOCATION);
+
source_range src_range;
src_range.m_start = start;
src_range.m_finish = finish;
@@ -13950,6 +13953,9 @@ set_source_range (tree expr, source_range src_range)
if (!EXPR_P (expr))
return UNKNOWN_LOCATION;
+ gcc_assert (src_range.m_start != INVALID_LOCATION);
+ gcc_assert (src_range.m_finish != INVALID_LOCATION);
+
location_t pure_loc = get_pure_location (EXPR_LOCATION (expr));
location_t adhoc = COMBINE_LOCATION_DATA (line_table,
pure_loc,
diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
index 73c583e..e3cc0d4 100644
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -160,7 +160,7 @@ typedef unsigned int linenum_type;
-----------+-------------------------------+-------------------------------
0x80000000 | Start of ad-hoc values; the lower 31 bits are used as an index
... | into the line_table->location_adhoc_data_map.data array.
- 0xffffffff | UINT_MAX |
+ 0xffffffff | UINT_MAX == INVALID_LOCATION |
-----------+-------------------------------+-------------------------------
Examples of location encoding.
@@ -260,6 +260,10 @@ typedef unsigned int linenum_type;
worked example in libcpp/location-example.txt. */
typedef unsigned int source_location;
+/* A bogus value, used for detecting uninitialized data in checked
+ builds. */
+#define INVALID_LOCATION ((source_location) UINT_MAX)
+
/* A range of source locations.
Ranges are closed:
@@ -1004,6 +1008,10 @@ COMBINE_LOCATION_DATA (struct line_maps *set,
source_range src_range,
void *block)
{
+ linemap_assert (loc != INVALID_LOCATION);
+ linemap_assert (src_range.m_start != INVALID_LOCATION);
+ linemap_assert (src_range.m_finish != INVALID_LOCATION);
+
return get_combined_adhoc_loc (set, loc, src_range, block);
}
diff --git a/libcpp/line-map.c b/libcpp/line-map.c
index 333e835..76c1ec8 100644
--- a/libcpp/line-map.c
+++ b/libcpp/line-map.c
@@ -84,6 +84,10 @@ location_adhoc_data_eq (const void *l1, const void *l2)
(const struct location_adhoc_data *) l1;
const struct location_adhoc_data *lb2 =
(const struct location_adhoc_data *) l2;
+ linemap_assert (lb1->src_range.m_start != INVALID_LOCATION);
+ linemap_assert (lb1->src_range.m_finish != INVALID_LOCATION);
+ linemap_assert (lb2->src_range.m_start != INVALID_LOCATION);
+ linemap_assert (lb2->src_range.m_finish != INVALID_LOCATION);
return (lb1->locus == lb2->locus
&& lb1->src_range.m_start == lb2->src_range.m_start
&& lb1->src_range.m_finish == lb2->src_range.m_finish
@@ -166,6 +170,9 @@ get_combined_adhoc_loc (struct line_maps *set,
struct location_adhoc_data lb;
struct location_adhoc_data **slot;
+ linemap_assert (src_range.m_start != INVALID_LOCATION);
+ linemap_assert (src_range.m_finish != INVALID_LOCATION);
+
if (IS_ADHOC_LOC (locus))
locus
= set->location_adhoc_data_map.data[locus & MAX_SOURCE_LOCATION].locus;
--
1.8.5.3