This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] have -Wformat-overflow handle -fexec-charset (PR 80503)
- From: Martin Sebor <msebor at gmail dot com>
- To: Gcc Patch List <gcc-patches at gcc dot gnu dot org>
- Date: Wed, 26 Apr 2017 15:59:17 -0600
- Subject: [PATCH] have -Wformat-overflow handle -fexec-charset (PR 80503)
- Authentication-results: sourceware.org; auth=none
Testing my solution for bug 77671 (missing -Wformat-overflow
sprintf with "%s") caused a regression in the charset/builtin2.c
test for bug 25120 (builtin *printf handlers are confused by
-fexec-charset). That led me to realize that like -Wformat
itself, the whole gimple-ssa-sprintf pass is oblivious to
potential differences between the source character set on
the host and the execution character set on the target. As
a result, when the host and target sets are different, the
pass misinterprets ordinary format characters as special
(e.g., parts of directives) and vice versa.
The attached patch implements a simple solution to this problem
by introducing a mapping between the two sets.
Martin
PR tree-optimization/80523 - -Wformat-overflow doesn't consider -fexec-charset
gcc/ChangeLog:
PR tree-optimization/80523
* gimple-ssa-sprintf.c (target_to_host_charmap): New global variable.
(init_target_to_host_charmap, target_to_host, target_strtol10): New
functions.
(maybe_warn, format_directive, parse_directive): Use new functions.
(pass_sprintf_length::execute): Call init_target_to_host_charmap.
gcc/testsuite/ChangeLog:
PR tree-optimization/80523
* gcc.dg/tree-ssa/builtin-sprintf-warn-18.c: New test.
Index: gcc/gimple-ssa-sprintf.c
===================================================================
--- gcc/gimple-ssa-sprintf.c (revision 247263)
+++ gcc/gimple-ssa-sprintf.c (working copy)
@@ -66,6 +66,7 @@ along with GCC; see the file COPYING3. If not see
#include "calls.h"
#include "cfgloop.h"
#include "intl.h"
+#include "langhooks.h"
#include "builtins.h"
#include "stor-layout.h"
@@ -273,6 +274,118 @@ target_size_max ()
return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
}
+/* A straightforward mapping from the execution character set to the host
+ character set indexed by execution character. */
+
+static char target_to_host_charmap[256];
+
+/* Initialize a mapping from the execution character set to the host
+ character set. */
+
+static bool
+init_target_to_host_charmap ()
+{
+ if (!init_target_chars ())
+ return false;
+
+ /* The subset of the source character set used by printf conversion
+ specifications (strictly speaking, not all letters are used but
+ they are included here for the sake of simplicity). The dollar
+ sign must be included even though it's not in the basic source
+ character set. */
+ const char srcset[] = " ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz0123456789#%'*+-.$";
+
+ /* Set the mapping for all characters to some ordinary value (i,e.,
+ not one used in printf conversion specifications) and overwrite
+ those that are used by conversion specifications with their
+ corresponding values. */
+ memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
+
+ for (const char *pc = srcset; *pc; ++pc)
+ {
+ /* Slice off the high end bits in case target characters are
+ signed. All values are expected to be non-nul, otherwise
+ there's a problem. */
+ if (unsigned char tc = lang_hooks.to_target_charset (*pc))
+ target_to_host_charmap[tc] = *pc;
+ else
+ return false;
+ }
+
+ return true;
+}
+
+/* Return the host source character corresponding to the character
+ CH in the execution character set if one exists, or some innocuous
+ (non-special, non-nul) source character otherwise. */
+
+static inline int
+target_to_host (unsigned char ch)
+{
+ return target_to_host_charmap[ch];
+}
+
+/* Return a string consisting of characters in the host source character
+ set corresponding to the string TARGSTR consisting of characters in
+ the execution character set. */
+
+static const char*
+target_to_host (const char *targstr)
+{
+ if (target_to_host ('%') == '%')
+ return targstr;
+
+ static char hostr[32];
+ for (char *ph = hostr; *targstr; ++targstr)
+ {
+ *ph++ = target_to_host (*targstr);
+ if (!*targstr)
+ break;
+
+ if (ph - hostr == sizeof hostr - 4)
+ {
+ *ph = '\0';
+ strcat (ph, "...");
+ break;
+ }
+ }
+
+ return hostr;
+}
+
+/* Convert the sequence of decimal digits in the execution character
+ starting at S to a long, just like strtol does. Return the result
+ and set *END to one past the last converted character. */
+
+static inline long
+target_strtol10 (const char *s, char** end)
+{
+ /* As an optimization use the host strtol if the digit zero is the same
+ in the host and target character sets. */
+ if ('0' == target_to_host ('0'))
+ return strtol (s, end, 10);
+
+ long val = 0;
+ for ( ; ; )
+ {
+ unsigned char c = target_to_host (*s);
+ if (ISDIGIT (c))
+ {
+ val *= 10;
+ val += c - '0';
+ }
+ else
+ break;
+
+ ++s;
+ }
+
+ *end = const_cast<char*>(s);
+
+ return val;
+}
+
/* Return the constant initial value of DECL if available or DECL
otherwise. Same as the synonymous function in c/c-typeck.c. */
@@ -2289,7 +2402,7 @@ maybe_warn (substring_loc &dirloc, source_range *p
/* The size of the destination region is exact. */
unsigned HOST_WIDE_INT navail = avail_range.max;
- if (*dir.beg != '%')
+ if (target_to_host (*dir.beg) != '%')
{
/* For plain character directives (i.e., the format string itself)
but not others, point the caret at the first character that's
@@ -2340,7 +2453,7 @@ maybe_warn (substring_loc &dirloc, source_range *p
"into a region of size %wu")));
return fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dir.len, dir.beg, res.min,
+ dir.len, target_to_host (dir.beg), res.min,
navail);
}
@@ -2357,7 +2470,7 @@ maybe_warn (substring_loc &dirloc, source_range *p
"into a region of size %wu"));
return fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dir.len, dir.beg,
+ dir.len, target_to_host (dir.beg),
res.max, navail);
}
@@ -2377,7 +2490,7 @@ maybe_warn (substring_loc &dirloc, source_range *p
"into a region of size %wu"));
return fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dir.len, dir.beg,
+ dir.len, target_to_host (dir.beg),
res.likely, navail);
}
@@ -2394,7 +2507,7 @@ maybe_warn (substring_loc &dirloc, source_range *p
"%wu bytes into a region of size %wu"));
return fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dir.len, dir.beg,
+ dir.len, target_to_host (dir.beg),
res.min, res.max,
navail);
}
@@ -2410,13 +2523,13 @@ maybe_warn (substring_loc &dirloc, source_range *p
"into a region of size %wu"));
return fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dir.len, dir.beg,
+ dir.len, target_to_host (dir.beg),
res.min, navail);
}
/* The size of the destination region is a range. */
- if (*dir.beg != '%')
+ if (target_to_host (*dir.beg) != '%')
{
unsigned HOST_WIDE_INT navail = avail_range.max;
@@ -2469,7 +2582,7 @@ maybe_warn (substring_loc &dirloc, source_range *p
return fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dir.len, dir.beg, res.min,
+ dir.len, target_to_host (dir.beg), res.min,
avail_range.min, avail_range.max);
}
@@ -2488,7 +2601,7 @@ maybe_warn (substring_loc &dirloc, source_range *p
"into a region of size between %wu and %wu"));
return fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dir.len, dir.beg, res.max,
+ dir.len, target_to_host (dir.beg), res.max,
avail_range.min, avail_range.max);
}
@@ -2510,7 +2623,7 @@ maybe_warn (substring_loc &dirloc, source_range *p
"into a region of size between %wu and %wu"));
return fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dir.len, dir.beg, res.likely,
+ dir.len, target_to_host (dir.beg), res.likely,
avail_range.min, avail_range.max);
}
@@ -2529,7 +2642,7 @@ maybe_warn (substring_loc &dirloc, source_range *p
"%wu bytes into a region of size between %wu and %wu"));
return fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dir.len, dir.beg,
+ dir.len, target_to_host (dir.beg),
res.min, res.max,
avail_range.min, avail_range.max);
}
@@ -2547,7 +2660,7 @@ maybe_warn (substring_loc &dirloc, source_range *p
"into a region of size between %wu and %wu"));
return fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dir.len, dir.beg,
+ dir.len, target_to_host (dir.beg),
res.min,
avail_range.min, avail_range.max);
}
@@ -2636,7 +2749,7 @@ format_directive (const pass_sprintf_length::call_
{
fmtwarn (dirloc, pargrange, NULL, info.warnopt (),
"%<%.*s%> directive argument is null",
- dirlen, dir.beg);
+ dirlen, target_to_host (dir.beg));
/* Don't bother processing the rest of the format string. */
res->warned = true;
@@ -2703,7 +2816,7 @@ format_directive (const pass_sprintf_length::call_
info.warnopt (),
"%<%.*s%> directive output of %wu bytes exceeds "
"minimum required size of 4095",
- dirlen, dir.beg, fmtres.range.min);
+ dirlen, target_to_host (dir.beg), fmtres.range.min);
else
{
const char *fmtstr
@@ -2715,7 +2828,7 @@ format_directive (const pass_sprintf_length::call_
warned = fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dirlen, dir.beg,
+ dirlen, target_to_host (dir.beg),
fmtres.range.min, fmtres.range.max);
}
}
@@ -2744,7 +2857,7 @@ format_directive (const pass_sprintf_length::call_
warned = fmtwarn (dirloc, pargrange, NULL, info.warnopt (),
"%<%.*s%> directive output of %wu bytes causes "
"result to exceed %<INT_MAX%>",
- dirlen, dir.beg, fmtres.range.min);
+ dirlen, target_to_host (dir.beg), fmtres.range.min);
else
{
const char *fmtstr
@@ -2755,7 +2868,7 @@ format_directive (const pass_sprintf_length::call_
"bytes may cause result to exceed %<INT_MAX%>"));
warned = fmtwarn (dirloc, pargrange, NULL,
info.warnopt (), fmtstr,
- dirlen, dir.beg,
+ dirlen, target_to_host (dir.beg),
fmtres.range.min, fmtres.range.max);
}
}
@@ -2847,7 +2960,7 @@ parse_directive (pass_sprintf_length::call_info &i
directive &dir, format_result *res,
const char *str, unsigned *argno)
{
- const char *pcnt = strchr (str, '%');
+ const char *pcnt = strchr (str, target_percent);
dir.beg = str;
if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
@@ -2889,7 +3002,7 @@ parse_directive (pass_sprintf_length::call_info &i
For vararg functions set to void_node. */
tree star_precision = NULL_TREE;
- if (ISDIGIT (*pf))
+ if (ISDIGIT (target_to_host (*pf)))
{
/* This could be either a POSIX positional argument, the '0'
flag, or a width, depending on what follows. Store it as
@@ -2896,10 +3009,10 @@ parse_directive (pass_sprintf_length::call_info &i
width and sort it out later after the next character has
been seen. */
char *end;
- width = strtol (pf, &end, 10);
+ width = target_strtol10 (pf, &end);
pf = end;
}
- else if ('*' == *pf)
+ else if (target_to_host (*pf) == '*')
{
/* Similarly to the block above, this could be either a POSIX
positional argument or a width, depending on what follows. */
@@ -2910,7 +3023,7 @@ parse_directive (pass_sprintf_length::call_info &i
++pf;
}
- if (*pf == '$')
+ if (target_to_host (*pf) == '$')
{
/* Handle the POSIX dollar sign which references the 1-based
positional argument number. */
@@ -2959,7 +3072,7 @@ parse_directive (pass_sprintf_length::call_info &i
the next field is the optional flags followed by an optional
width. */
for ( ; ; ) {
- switch (*pf)
+ switch (target_to_host (*pf))
{
case ' ':
case '0':
@@ -2966,7 +3079,7 @@ parse_directive (pass_sprintf_length::call_info &i
case '+':
case '-':
case '#':
- dir.set_flag (*pf++);
+ dir.set_flag (target_to_host (*pf++));
break;
default:
@@ -2975,13 +3088,13 @@ parse_directive (pass_sprintf_length::call_info &i
}
start_width:
- if (ISDIGIT (*pf))
+ if (ISDIGIT (target_to_host (*pf)))
{
char *end;
- width = strtol (pf, &end, 10);
+ width = target_strtol10 (pf, &end);
pf = end;
}
- else if ('*' == *pf)
+ else if (target_to_host (*pf) == '*')
{
if (*argno < gimple_call_num_args (info.callstmt))
star_width = gimple_call_arg (info.callstmt, (*argno)++);
@@ -2993,7 +3106,7 @@ parse_directive (pass_sprintf_length::call_info &i
}
++pf;
}
- else if ('\'' == *pf)
+ else if (target_to_host (*pf) == '\'')
{
/* The POSIX apostrophe indicating a numeric grouping
in the current locale. Even though it's possible to
@@ -3005,17 +3118,17 @@ parse_directive (pass_sprintf_length::call_info &i
}
start_precision:
- if ('.' == *pf)
+ if (target_to_host (*pf) == '.')
{
++pf;
- if (ISDIGIT (*pf))
+ if (ISDIGIT (target_to_host (*pf)))
{
char *end;
- precision = strtol (pf, &end, 10);
+ precision = target_strtol10 (pf, &end);
pf = end;
}
- else if ('*' == *pf)
+ else if (target_to_host (*pf) == '*')
{
if (*argno < gimple_call_num_args (info.callstmt))
star_precision = gimple_call_arg (info.callstmt, (*argno)++);
@@ -3035,10 +3148,10 @@ parse_directive (pass_sprintf_length::call_info &i
}
}
- switch (*pf)
+ switch (target_to_host (*pf))
{
case 'h':
- if (pf[1] == 'h')
+ if (target_to_host (pf[1]) == 'h')
{
++pf;
dir.modifier = FMT_LEN_hh;
@@ -3059,7 +3172,7 @@ parse_directive (pass_sprintf_length::call_info &i
break;
case 'l':
- if (pf[1] == 'l')
+ if (target_to_host (pf[1]) == 'l')
{
++pf;
dir.modifier = FMT_LEN_ll;
@@ -3080,7 +3193,7 @@ parse_directive (pass_sprintf_length::call_info &i
break;
}
- switch (*pf)
+ switch (target_to_host (*pf))
{
/* Handle a sole '%' character the same as "%%" but since it's
undefined prevent the result from being folded. */
@@ -3141,7 +3254,7 @@ parse_directive (pass_sprintf_length::call_info &i
return 0;
}
- dir.specifier = *pf++;
+ dir.specifier = target_to_host (*pf++);
if (star_width)
{
@@ -3708,6 +3821,9 @@ pass_sprintf_length::handle_gimple_call (gimple_st
unsigned int
pass_sprintf_length::execute (function *fun)
{
+ if (!target_to_host_charmap ['%'])
+ init_target_to_host_charmap ();
+
basic_block bb;
FOR_EACH_BB_FN (bb, fun)
{
Index: gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (working copy)
@@ -0,0 +1,110 @@
+/* PR tree-optimization/80523 - -Wformat-overflow doesn't consider
+ -fexec-charset
+ { dg-do compile }
+ { dg-require-iconv "IBM1047" }
+ { dg-options "-O2 -Wall -Wno-format -Wformat-overflow -fexec-charset=IBM1047 -ftrack-macro-expansion=0" } */
+
+char buf[1];
+void sink (void*);
+
+#define T(...) (__builtin_sprintf (buf + 1, __VA_ARGS__), sink (buf))
+
+/* Exercise all special C and POSIX characters. */
+
+void test_characters ()
+{
+ T ("%%"); /* { dg-warning ".%%. directive writing 1 byte" } */
+
+ T ("%A", 0.0); /* { dg-warning ".%A. directive writing between 6 and 20 " } */
+ T ("%a", 0.0); /* { dg-warning ".%a. directive writing between 6 and 20 " } */
+
+ T ("%C", 'a'); /* { dg-warning ".%C. directive writing 1 byte" "bug 80537" { xfail *-*-* } } */
+ T ("%c", 'a'); /* { dg-warning ".%c. directive writing 1 byte" } */
+
+ T ("%d", 12); /* { dg-warning ".%d. directive writing 2 bytes" } */
+ T ("% d", 12); /* { dg-warning ".% d. directive writing 3 bytes" } */
+ T ("%-d", 123); /* { dg-warning ".%-d. directive writing 3 bytes" } */
+ T ("%+d", 1234); /* { dg-warning ".%\\+d. directive writing 5 bytes" } */
+ T ("%'d", 1234); /* { dg-warning ".%'d. directive writing 5 bytes" "bug 80535" { xfail *-*-* } } */
+ T ("%1$d", 2345); /* { dg-warning ".%1\\\$d. directive writing 4 bytes" } */
+
+ /* Verify that digits are correctly interpreted as width and precision. */
+ T ("%0d", 12345); /* { dg-warning ".%0d. directive writing 5 bytes" } */
+ T ("%1d", 12345); /* { dg-warning ".%1d. directive writing 5 bytes" } */
+ T ("%2d", 12345); /* { dg-warning ".%2d. directive writing 5 bytes" } */
+ T ("%3d", 12345); /* { dg-warning ".%3d. directive writing 5 bytes" } */
+ T ("%4d", 12345); /* { dg-warning ".%4d. directive writing 5 bytes" } */
+ T ("%5d", 12345); /* { dg-warning ".%5d. directive writing 5 bytes" } */
+ T ("%6d", 12345); /* { dg-warning ".%6d. directive writing 6 bytes" } */
+ T ("%7d", 12345); /* { dg-warning ".%7d. directive writing 7 bytes" } */
+ T ("%8d", 12345); /* { dg-warning ".%8d. directive writing 8 bytes" } */
+ T ("%9d", 12345); /* { dg-warning ".%9d. directive writing 9 bytes" } */
+
+ T ("%.0d", 12345); /* { dg-warning ".%.0d. directive writing 5 bytes" } */
+ T ("%.1d", 12345); /* { dg-warning ".%.1d. directive writing 5 bytes" } */
+ T ("%.2d", 12345); /* { dg-warning ".%.2d. directive writing 5 bytes" } */
+ T ("%.3d", 12345); /* { dg-warning ".%.3d. directive writing 5 bytes" } */
+ T ("%.4d", 12345); /* { dg-warning ".%.4d. directive writing 5 bytes" } */
+ T ("%.5d", 12345); /* { dg-warning ".%.5d. directive writing 5 bytes" } */
+ T ("%.6d", 12345); /* { dg-warning ".%.6d. directive writing 6 bytes" } */
+ T ("%.7d", 12345); /* { dg-warning ".%.7d. directive writing 7 bytes" } */
+ T ("%.8d", 12345); /* { dg-warning ".%.8d. directive writing 8 bytes" } */
+ T ("%.9d", 12345); /* { dg-warning ".%.9d. directive writing 9 bytes" } */
+
+ T ("%hhd", 12); /* { dg-warning ".%hhd. directive writing 2 bytes" } */
+ T ("%hd", 234); /* { dg-warning ".%hd. directive writing 3 bytes" } */
+
+ {
+ const __PTRDIFF_TYPE__ i = 3456;
+ T ("%jd", i); /* { dg-warning ".%jd. directive writing 4 bytes" } */
+ }
+
+ T ("%ld", 45678L); /* { dg-warning ".%ld. directive writing 5 bytes" } */
+
+ {
+ const __PTRDIFF_TYPE__ i = 56789;
+ T ("%td", i); /* { dg-warning ".%td. directive writing 5 bytes" } */
+ }
+
+ {
+ const __SIZE_TYPE__ i = 67890;
+ T ("%zd", i); /* { dg-warning ".%zd. directive writing 5 bytes" } */
+ }
+
+ T ("%E", 0.0); /* { dg-warning ".%E. directive writing 12 bytes" } */
+ T ("%e", 0.0); /* { dg-warning ".%e. directive writing 12 bytes" } */
+ T ("%F", 0.0); /* { dg-warning ".%F. directive writing 8 bytes" } */
+ T ("%f", 0.0); /* { dg-warning ".%f. directive writing 8 bytes" } */
+ T ("%G", 0.0); /* { dg-warning ".%G. directive writing 1 byte" } */
+ T ("%g", 0.0); /* { dg-warning ".%g. directive writing 1 byte" } */
+
+ T ("%i", 123); /* { dg-warning ".%i. directive writing 3 bytes" } */
+
+ {
+ int n;
+
+ T ("%n", &n); /* { dg-warning "writing a terminating nul" } */
+ T ("%nH", &n); /* { dg-warning ".H. directive writing 1 byte" } */
+ }
+
+ T ("%o", 999); /* { dg-warning ".%o. directive writing 4 bytes" } */
+ T ("%#o", 999); /* { dg-warning ".%#o. directive writing 5 bytes" } */
+
+ T ("%x", 1234); /* { dg-warning ".%x. directive writing 3 bytes" } */
+ T ("%#X", 1235); /* { dg-warning ".%#X. directive writing 5 bytes" } */
+
+ T ("%S", L"1"); /* { dg-warning ".%S. directive writing 1 byte" } */
+ T ("%-s", "1"); /* { dg-warning ".%-s. directive writing 1 byte" } */
+
+ /* Verify that characters in the source character set appear in
+ the text of the warning unchanged (i.e., not as their equivalents
+ in the execution character set on the target). The trailing %%
+ disables sprintf->strcpy optimization. */
+ T ("ABCDEFGHIJ%%"); /* { dg-warning ".ABCDEFGHIJ. directive writing 10 bytes" } */
+ T ("KLMNOPQRST%%"); /* { dg-warning ".KLMNOPQRST. directive writing 10 bytes" } */
+ T ("UVWXYZ%%"); /* { dg-warning ".UVWXYZ. directive writing 6 bytes" } */
+
+ T ("abcdefghij%%"); /* { dg-warning ".abcdefghij. directive writing 10 bytes" } */
+ T ("klmnopqrst%%"); /* { dg-warning ".klmnopqrst. directive writing 10 bytes" } */
+ T ("uvwxyz%%"); /* { dg-warning ".uvwxyz. directive writing 6 bytes" } */
+}