This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] implement -Wformat-diag, v2
- From: Martin Sebor <msebor at gmail dot com>
- To: gcc-patches <gcc-patches at gcc dot gnu dot org>
- Date: Wed, 22 May 2019 10:42:48 -0600
- Subject: [PATCH] implement -Wformat-diag, v2
Attached is a revised implementation of the -Wformat-diag checker
incorporating the feedback I got on the first revision.
Martin
gcc/c-family/ChangeLog:
* c-format.c (function_format_info::format_type): Adjust type.
(function_format_info::is_raw): New member.
(decode_format_type): Adjust signature. Handle "raw" diag attributes.
(decode_format_attr): Adjust call to decode_format_type.
Avoid a redundant call to convert_format_name_to_system_name.
Avoid abbreviating the word "arguments" in a diagnostic.
(format_warning_substr): New function.
(avoid_dollar_number): Quote dollar sign in a diagnostic.
(finish_dollar_format_checking): Same.
(check_format_info): Same.
(struct baltoks_t): New.
(c_opers, c_keywords, cxx_keywords, badwords, contrs): New arrays.
(maybe_diag_unbalanced_tokens, check_tokens, check_plain): New
functions.
(check_format_info_main): Call check_plain. Use baltoks_t. Call
maybe_diag_unbalanced_tokens.
(handle_format_attribute): Spell out the word "arguments" in
a diagnostic.
gcc/testsuite/ChangeLog:
* gcc.dg/format/gcc_diag-11.c: New test.
diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index a7f76c1c01d..713fce442c9 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -52,7 +52,13 @@ enum format_type { printf_format_type, asm_fprintf_format_type,
struct function_format_info
{
- int format_type; /* type of format (printf, scanf, etc.) */
+ enum format_type format_type; /* type of format (printf, scanf, etc.) */
+ /* IS_RAW is relevant only for GCC diagnostic format functions.
+ It is set for "raw" formatting functions like pp_printf that
+ are not intended to produce complete diagnostics according to
+ GCC guidelines, and clear for others like error and warning
+ whose format string is checked for proper quoting and spelling. */
+ bool is_raw;
unsigned HOST_WIDE_INT format_num; /* number of format argument */
unsigned HOST_WIDE_INT first_arg_num; /* number of first arg (zero for varargs) */
};
@@ -65,7 +71,7 @@ static GTY(()) tree locus;
static bool decode_format_attr (const_tree, tree, tree, function_format_info *,
bool);
-static int decode_format_type (const char *);
+static format_type decode_format_type (const char *, bool * = NULL);
static bool check_format_string (const_tree argument,
unsigned HOST_WIDE_INT format_num,
@@ -111,6 +117,32 @@ format_warning_at_char (location_t fmt_string_loc, tree format_string_cst,
return warned;
}
+
+/* Emit a warning as per format_warning_va, but construct the substring_loc
+ for the substring at offset (POS1, POS2 - 1) within a string constant
+ FORMAT_STRING_CST at FMT_STRING_LOC. */
+
+ATTRIBUTE_GCC_DIAG (6,7)
+static bool
+format_warning_substr (location_t fmt_string_loc, tree format_string_cst,
+ int pos1, int pos2, int opt, const char *gmsgid, ...)
+{
+ va_list ap;
+ va_start (ap, gmsgid);
+ tree string_type = TREE_TYPE (format_string_cst);
+
+ pos2 -= 1;
+
+ substring_loc fmt_loc (fmt_string_loc, string_type, pos1, pos1, pos2);
+ format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL,
+ NULL);
+ bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
+ va_end (ap);
+
+ return warned;
+}
+
+
/* Check that we have a pointer to a string suitable for use as a format.
The default is to check for a char type.
For objective-c dialects, this is extended to include references to string
@@ -320,10 +352,8 @@ decode_format_attr (const_tree fntype, tree atname, tree args,
{
const char *p = IDENTIFIER_POINTER (format_type_id);
- p = convert_format_name_to_system_name (p);
+ info->format_type = decode_format_type (p, &info->is_raw);
- info->format_type = decode_format_type (p);
-
if (!c_dialect_objc ()
&& info->format_type == gcc_objc_string_format_type)
{
@@ -359,7 +389,7 @@ decode_format_attr (const_tree fntype, tree atname, tree args,
if (info->first_arg_num != 0 && info->first_arg_num <= info->format_num)
{
gcc_assert (!validated_p);
- error ("format string argument follows the args to be formatted");
+ error ("format string argument follows the arguments to be formatted");
return false;
}
@@ -1067,27 +1097,55 @@ static void format_type_warning (const substring_loc &fmt_loc,
char conversion_char);
/* Decode a format type from a string, returning the type, or
- format_type_error if not valid, in which case the caller should print an
- error message. */
-static int
-decode_format_type (const char *s)
+ format_type_error if not valid, in which case the caller should
+ print an error message. On success, when IS_RAW is non-null, set
+ *IS_RAW when the format type corresponds to a GCC "raw" diagnostic
+ formatting function and clear it otherwise. */
+static format_type
+decode_format_type (const char *s, bool *is_raw /* = NULL */)
{
- int i;
- int slen;
+ bool is_raw_buf;
+
+ if (!is_raw)
+ is_raw = &is_raw_buf;
+
+ *is_raw = false;
s = convert_format_name_to_system_name (s);
- slen = strlen (s);
- for (i = 0; i < n_format_types; i++)
+
+ size_t slen = strlen (s);
+ for (int i = 0; i < n_format_types; i++)
{
- int alen;
+ /* Check for a match with no underscores. */
if (!strcmp (s, format_types[i].name))
- return i;
- alen = strlen (format_types[i].name);
+ return static_cast<format_type> (i);
+
+ /* Check for leading and trailing underscores. */
+ size_t alen = strlen (format_types[i].name);
if (slen == alen + 4 && s[0] == '_' && s[1] == '_'
&& s[slen - 1] == '_' && s[slen - 2] == '_'
&& !strncmp (s + 2, format_types[i].name, alen))
- return i;
+ return static_cast<format_type>(i);
+
+ /* Check for the "_raw" suffix and no leading underscores. */
+ if (slen == alen + 4
+ && !strncmp (s, format_types[i].name, alen)
+ && !strcmp (s + alen, "_raw"))
+ {
+ *is_raw = true;
+ return static_cast<format_type>(i);
+ }
+
+ /* Check for the "_raw__" suffix and leading underscores. */
+ if (slen == alen + 8 && s[0] == '_' && s[1] == '_'
+ && !strncmp (s + 2, format_types[i].name, alen)
+ && !strcmp (s + 2 + alen, "_raw__"))
+ {
+ *is_raw = true;
+ return static_cast<format_type>(i);
+ }
}
+
return format_type_error;
}
@@ -1350,7 +1408,8 @@ avoid_dollar_number (const char *format)
format++;
if (*format == '$')
{
- warning (OPT_Wformat_, "$ operand number used after format without operand number");
+ warning (OPT_Wformat_,
+ "%<$%>operand number used after format without operand number");
return true;
}
return false;
@@ -1381,7 +1440,8 @@ finish_dollar_format_checking (format_check_results *res, int pointer_gap_ok)
found_pointer_gap = true;
else
warning_at (res->format_string_loc, OPT_Wformat_,
- "format argument %d unused before used argument %d in $-style format",
+ "format argument %d unused before used argument %d "
+ "in %<$%>-style format",
i + 1, dollar_max_arg_used);
}
}
@@ -1525,7 +1585,8 @@ check_format_info (function_format_info *info, tree params,
}
if (res.number_dollar_extra_args > 0 && res.number_non_literal == 0
&& res.number_other == 0)
- warning_at (loc, OPT_Wformat_extra_args, "unused arguments in $-style format");
+ warning_at (loc, OPT_Wformat_extra_args,
+ "unused arguments in %<$%>-style format");
if (res.number_empty > 0 && res.number_non_literal == 0
&& res.number_other == 0)
warning_at (loc, OPT_Wformat_zero_length, "zero-length %s format string",
@@ -2789,6 +2850,904 @@ check_argument_type (const format_char_info *fci,
return true;
}
+/* Describes "paired tokens" within the format string that are
+ expected to be balanced. */
+
+struct baltoks_t
+{
+ baltoks_t (): singlequote (), doublequote () { }
+
+ typedef auto_vec<const char *> balanced_tokens_t;
+ /* Vectors of pointers to opening opening brackets ('['), curly
+ brackets ('{'), quoting directives (like GCC "%<"), parentheses,
+ and angle brackets ('<'). Used to detect unbalanced tokens. */
+ balanced_tokens_t brackets;
+ balanced_tokens_t curly;
+ balanced_tokens_t quotdirs;
+ balanced_tokens_t parens;
+ balanced_tokens_t pointy;
+ /* Pointer to the last opening quote. */
+ const char *singlequote;
+ const char *doublequote;
+};
+
+/* Describes a keyword, operator, or other name. */
+
+struct token_t
+{
+ const char *name; /* Keyword/operator name. */
+ unsigned char len; /* Its length. */
+ const char *alt; /* Alternate spelling. */
+};
+
+/* Helper for initializing global token_t arrays below. */
+#define NAME(name) { name, sizeof name - 1, NULL }
+
+/* C/C++ operators that are expected to be quoted within the format
+ string. */
+
+static const token_t c_opers[] =
+ {
+ NAME ("!="), NAME ("%="), NAME ("&&"), NAME ("&="), NAME ("*="),
+ NAME ("++"), NAME ("+="), NAME ("--"), NAME ("-="), NAME ("->"),
+ NAME ("/="), NAME ("<<"), NAME ("<<="), NAME ("<="), NAME ("=="),
+ NAME (">="), NAME (">>="), NAME (">>"), NAME ("?:"), NAME ("^="),
+ NAME ("|="), NAME ("||")
+ };
+
+static const token_t cxx_opers[] =
+ {
+ NAME ("->*"), NAME (".*"), NAME ("::"), NAME ("<=>")
+ };
+
+/* Common C/C++ keywords that are expected to be quoted within the format
+ string. Keywords like auto, inline, or volatile are exccluded because
+ they are sometimes used in common terms like /auto variables/, /inline
+ function/, or /volatile access/ where they should not be quoted. */
+
+static const token_t c_keywords[] =
+ {
+#undef NAME
+#define NAME(name, alt) { name, sizeof name - 1, alt }
+
+ NAME ("alignas", NULL),
+ NAME ("alignof", NULL),
+ NAME ("asm", NULL),
+ NAME ("bool", NULL),
+ NAME ("char", NULL),
+ NAME ("const %", NULL),
+ NAME ("const-qualified", "%<const%>-qualified"),
+ NAME ("float", NULL),
+ NAME ("ifunc", NULL),
+ NAME ("int", NULL),
+ NAME ("long double", NULL),
+ NAME ("long int", NULL),
+ NAME ("long long", NULL),
+ NAME ("malloc", NULL),
+ NAME ("noclone", NULL),
+ NAME ("noinline", NULL),
+ NAME ("nonnull", NULL),
+ NAME ("noreturn", NULL),
+ NAME ("nothrow", NULL),
+ NAME ("offsetof", NULL),
+ NAME ("readonly", "read-only"),
+ NAME ("readwrite", "read-write"),
+ NAME ("restrict %", NULL),
+ NAME ("restrict-qualified", "%<restrict%>-qualified"),
+ NAME ("short int", NULL),
+ NAME ("signed char", NULL),
+ NAME ("signed int", NULL),
+ NAME ("signed long", NULL),
+ NAME ("signed short", NULL),
+ NAME ("sizeof", NULL),
+ NAME ("typeof", NULL),
+ NAME ("unsigned char", NULL),
+ NAME ("unsigned int", NULL),
+ NAME ("unsigned long", NULL),
+ NAME ("unsigned short", NULL),
+ NAME ("volatile %", NULL),
+ NAME ("volatile-qualified", "%<volatile%>-qualified"),
+ NAME ("weakref", NULL),
+ };
+
+static const token_t cxx_keywords[] =
+ {
+ /* C++ only keywords and operators. */
+ NAME ("catch", NULL),
+ NAME ("constexpr if", NULL),
+ NAME ("constexpr", NULL),
+ NAME ("consteval", NULL),
+ NAME ("decltype", NULL),
+ NAME ("nullptr", NULL),
+ NAME ("operator delete", NULL),
+ NAME ("operator new", NULL),
+ NAME ("typeid", NULL),
+ NAME ("typeinfo", NULL)
+ };
+
+/* Blacklisted words such as misspellings that should be avoided in favor
+ of the specified alternatives. */
+static const struct
+{
+ const char *name; /* Bad word or contraction. */
+ unsigned char len; /* Its length. */
+ const char *alt; /* Preferred alternative. */
+} badwords[] =
+ {
+ NAME ("arg", "argument"),
+ NAME ("bitfield", "bit-field"),
+ NAME ("builtin function", "built-in function"),
+ NAME ("can not", "cannot"),
+ NAME ("commandline option", "command-line option"),
+ NAME ("commandline", "command line"),
+ NAME ("command line option", "command-line option"),
+ NAME ("decl", "declaration"),
+ NAME ("enumeral", "enumerated"),
+ NAME ("floating point", "floating-point"),
+ NAME ("non-zero", "nonzero"),
+ NAME ("reg", "register"),
+ NAME ("stmt", "statement"),
+ };
+
+/* Common contractions that should be avoided in favor of the specified
+ alternatives. */
+
+static const struct
+{
+ const char *name; /* Bad word or contraction. */
+ unsigned char len; /* Its length. */
+ const char *alt; /* Preferred alternative. */
+} contrs[] =
+ {
+ NAME ("can't", "cannot"),
+ NAME ("didn't", "did not"),
+ /* These are commonly abused. Avoid diagnosing them for now.
+ NAME ("isn't", "is not"),
+ NAME ("don't", "is not"),
+ */
+ NAME ("mustn't", "must not"),
+ NAME ("needn't", "need not"),
+ NAME ("should't", "should not"),
+ NAME ("that's", "that is"),
+ NAME ("there's", "there is"),
+ NAME ("they're", "they are"),
+ NAME ("what's", "what is"),
+ NAME ("won't", "will not")
+ };
+
+/* Check for unquoted TOKENS. FORMAT_STRING_LOC is the location of
+ the format string, FORMAT_STRING_CST the format string itself (as
+ a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are pointers to
+ the beginning of the format string and the character currently
+ being processed, and BALTOKS describes paired "tokens" within
+ the format string that are expected to be balanced.
+ Returns a pointer to the last processed character or null when
+ nothing was done. */
+
+static const char*
+check_tokens (const token_t *tokens, unsigned ntoks,
+ location_t format_string_loc, tree format_string_cst,
+ const char *orig_format_chars, const char *format_chars,
+ baltoks_t &baltoks)
+{
+ /* For brevity. */
+ const int opt = OPT_Wformat_diag;
+ /* Zero-based starting position of a problem sequence. */
+ int fmtchrpos = format_chars - orig_format_chars;
+
+ /* For identifier-like "words," set to the word length. */
+ unsigned wlen = 0;
+ /* Set for an operator, clear for an identifier/word. */
+ bool is_oper = false;
+ bool underscore = false;
+
+ if (format_chars[0] == '_' || ISALPHA (format_chars[0]))
+ {
+ while (format_chars[wlen] == '_' || ISALNUM (format_chars[wlen]))
+ {
+ underscore |= format_chars[wlen] == '_';
+ ++wlen;
+ }
+ }
+ else
+ is_oper = true;
+
+ for (unsigned i = 0; i != ntoks; ++i)
+ {
+ unsigned toklen = tokens[i].len;
+
+ if (toklen < wlen
+ || strncmp (format_chars, tokens[i].name, toklen))
+ continue;
+
+ if (toklen == 2
+ && format_chars - orig_format_chars > 0
+ && (TOUPPER (format_chars[-1]) == 'C'
+ || TOUPPER (format_chars[-1]) == 'G'))
+ return format_chars + toklen - 1; /* Reference to C++ or G++. */
+
+ if (ISPUNCT (format_chars[toklen - 1]))
+ {
+ if (format_chars[toklen - 1] == format_chars[toklen])
+ return NULL; /* Operator followed by another punctuator. */
+ }
+ else if (ISALNUM (format_chars[toklen]))
+ return NULL; /* Keyword prefix for a longer word. */
+
+ if (toklen == 2
+ && format_chars[0] == '-'
+ && format_chars[1] == '-'
+ && ISALNUM (format_chars[2]))
+ return NULL; /* Probably option like --help. */
+
+ /* Allow this ugly warning for the time being. */
+ if (toklen == 2
+ && format_chars - orig_format_chars > 6
+ && !strncmp (format_chars - 7, " count >= width of ", 19))
+ return format_chars + 10;
+
+ /* The token is a type if it ends in an alphabetic character. */
+ bool is_type = (ISALPHA (tokens[i].name[toklen - 1])
+ && strchr (tokens[i].name, ' '));
+
+ /* Backtrack to the last alphabetic character (for tokens whose
+ names ends in '%'). */
+ if (!is_oper)
+ while (!ISALPHA (tokens[i].name[toklen - 1]))
+ --toklen;
+
+ if (format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + toklen, opt,
+ (is_type
+ ? G_("unquoted type name %<%.*s%> in format")
+ : (is_oper
+ ? G_("unquoted operator %<%.*s%> in format")
+ : G_("unquoted keyword %<%.*s%> in format"))),
+ toklen, format_chars)
+ && tokens[i].alt)
+ inform (format_string_loc, "use %qs instead", tokens[i].alt);
+
+ return format_chars + toklen - 1;
+ }
+
+ /* Diagnose unquoted __attribute__. Consider any parenthesized
+ argument to the attribute to avoid redundant warnings for
+ the double parentheses that might follow. */
+ if (!strncmp (format_chars, "__attribute", sizeof "__attribute" - 1))
+ {
+ unsigned nchars = sizeof "__attribute" - 1;
+ while ('_' == format_chars[nchars])
+ ++nchars;
+
+ for (int i = nchars; format_chars[i]; ++i)
+ if (' ' != format_chars[i])
+ {
+ nchars = i;
+ break;
+ }
+
+ if (format_chars[nchars] == '(')
+ {
+ baltoks.parens.safe_push (format_chars + nchars);
+
+ ++nchars;
+ bool close = false;
+ if (format_chars[nchars] == '(')
+ {
+ baltoks.parens.safe_push (format_chars + nchars);
+ close = true;
+ ++nchars;
+ }
+ for (int i = nchars; format_chars[i]; ++i)
+ if (')' == format_chars[i])
+ {
+ if (baltoks.parens.length () > 0)
+ baltoks.parens.pop ();
+ nchars = i + 1;
+ break;
+ }
+
+ if (close && format_chars[nchars] == ')')
+ {
+ if (baltoks.parens.length () > 0)
+ baltoks.parens.pop ();
+ ++nchars;
+ }
+ }
+
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted attribute in format");
+ return format_chars + nchars - 1;
+ }
+
+ /* Diagnose unquoted built-ins. */
+ if (format_chars[0] == '_'
+ && format_chars[1] == '_'
+ && (!strncmp (format_chars + 2, "atomic", sizeof "atomic" - 1)
+ || !strncmp (format_chars + 2, "builtin", sizeof "builtin" - 1)
+ || !strncmp (format_chars + 2, "sync", sizeof "sync" - 1)))
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + wlen, opt,
+ "unquoted name of built-in function %<%.*s%> "
+ "in format",
+ wlen, format_chars);
+ return format_chars + wlen - 1;
+ }
+
+ /* Diagnose unquoted substrings of alphanumeric characters containing
+ underscores. They most likely refer to identifiers and should be
+ quoted. */
+ if (underscore)
+ format_warning_substr (format_string_loc, format_string_cst,
+ format_chars - orig_format_chars,
+ format_chars + wlen - orig_format_chars,
+ opt,
+ "unquoted identifier or keyword %<%.*s%> in format",
+ wlen, format_chars);
+ else
+ {
+ /* Diagnose some common missspellings. */
+ for (unsigned i = 0; i != sizeof badwords / sizeof *badwords; ++i)
+ {
+ unsigned badwlen = strspn (badwords[i].name, " -");
+ if (wlen >= badwlen
+ && (wlen <= badwords[i].len
+ || (wlen == badwords[i].len + 1U
+ && TOUPPER (format_chars[wlen - 1]) == 'S'))
+ && !strncasecmp (format_chars, badwords[i].name, badwords[i].len))
+ {
+ /* Handle singular as well as plural forms of all bad words
+ even though the latter don't necessarily make sense for
+ all of the former (like "can nots"). */
+ badwlen = badwords[i].len;
+ const char *plural = "";
+ if (TOUPPER (format_chars[badwlen]) == 'S')
+ {
+ ++badwlen;
+ plural = "s";
+ }
+
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + badwords[i].len,
+ opt,
+ "misspelled term %<%.*s%> in format; "
+ "use %<%s%s%> instead",
+ badwlen, format_chars,
+ badwords[i].alt, plural);
+
+ return format_chars + badwords[i].len - 1;
+ }
+ }
+
+ /* Skip C++/G++. */
+ if (!strncasecmp (format_chars, "c++", 3)
+ || !strncasecmp (format_chars, "g++", 3))
+ return format_chars + 2;
+ }
+
+ return wlen ? format_chars + wlen - 1 : NULL;
+}
+
+/* Check plain text in a format string of a GCC diagnostic function
+ for common quoting, punctuation, and spelling mistakes, and issue
+ -Wformat-diag warnings if they are found. FORMAT_STRING_LOC is
+ the location of the format string, FORMAT_STRING_CST the format
+ string itself (as a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are
+ ponters to the beginning of the format string and the character
+ currently being processed, and BALTOKS describes paired "tokens"
+ within the format string that are expected to be balanced.
+ Returns a pointer to the last processed character. */
+
+static const char*
+check_plain (location_t format_string_loc, tree format_string_cst,
+ const char *orig_format_chars, const char *format_chars,
+ baltoks_t &baltoks)
+{
+ /* For brevity. */
+ const int opt = OPT_Wformat_diag;
+ /* Zero-based starting position of a problem sequence. */
+ int fmtchrpos = format_chars - orig_format_chars;
+
+ if (*format_chars == '%')
+ {
+ /* Diagnose %<%s%> and suggest using %qs instead. */
+ if (!strncmp (format_chars, "%<%s%>", 6))
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + 6, opt,
+ "quoted %qs directive in format; "
+ "use %qs instead", "%s", "%qs");
+ else if (format_chars - orig_format_chars > 2
+ && !strncasecmp (format_chars - 3, "can%'t", 5)
+ && !ISALPHA (format_chars[1]))
+ format_warning_substr (format_string_loc,
+ format_string_cst,
+ fmtchrpos - 3, fmtchrpos + 3, opt,
+ "contraction %<%.*s%> in format; "
+ "use %qs instead",
+ 6, format_chars - 3, "cannot");
+
+ return format_chars;
+ }
+
+ if (baltoks.quotdirs.length ())
+ {
+ /* Skip over all plain text within a quoting directive until
+ the next directive. */
+ while (*format_chars && '%' != *format_chars)
+ ++format_chars;
+
+ return format_chars;
+ }
+
+ /* The length of the problem sequence. */
+ int nchars = 0;
+
+ /* Diagnose any whitespace characters other than <space> but only
+ leading, trailing, and two or more consecutive <space>s. Do
+ this before diagnosing control characters because whitespace
+ is a subset of controls. */
+ const char *other_than_space = NULL;
+ while (ISSPACE (format_chars[nchars]))
+ {
+ if (format_chars[nchars] != ' ' && !other_than_space)
+ other_than_space = format_chars + nchars;
+ ++nchars;
+ }
+
+ if (nchars)
+ {
+ /* This is the most common problem: go the extra mile to decribe
+ the problem in as much helpful detail as possible. */
+ if (other_than_space)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted whitespace character %qc in format",
+ *other_than_space);
+ return format_chars + nchars - 1;
+ }
+
+ if (fmtchrpos == 0)
+ /* Accept strings of leading spaces with no warning. */
+ return format_chars + nchars - 1;
+
+ if (!format_chars[nchars])
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "spurious trailing space in format");
+ return format_chars + nchars - 1;
+ }
+
+ if (nchars > 1)
+ {
+ if (nchars == 2
+ && orig_format_chars < format_chars
+ && format_chars[-1] == '.'
+ && format_chars[0] == ' '
+ && format_chars[1] == ' ')
+ {
+ /* A period followed by two spaces. */
+ if (ISUPPER (*orig_format_chars))
+ {
+ /* If the part before the period is a capitalized
+ sentence check to make sure that what follows
+ is also capitalized. */
+ if (ISLOWER (format_chars[2]))
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "inconsistent capitalization in "
+ "format");
+ }
+ }
+ else
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted sequence of %i consecutive "
+ "space characters in format", nchars);
+ return format_chars + nchars - 1;
+ }
+
+ format_chars += nchars;
+ nchars = 0;
+ }
+
+ fmtchrpos = format_chars - orig_format_chars;
+
+ /* Diagnose any unquoted control characters other than the terminating
+ NUL. */
+ while (format_chars[nchars] && ISCNTRL (format_chars[nchars]))
+ ++nchars;
+
+ if (nchars > 1)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted control characters in format");
+ return format_chars + nchars - 1;
+ }
+ if (nchars)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted control character %qc in format",
+ *format_chars);
+ return format_chars + nchars - 1;
+ }
+
+ if (ISPUNCT (format_chars[0]))
+ {
+ size_t nelts = sizeof c_opers / sizeof *c_opers;
+ if (const char *ret = check_tokens (c_opers, nelts,
+ format_string_loc, format_string_cst,
+ orig_format_chars, format_chars,
+ baltoks))
+ return ret;
+
+ nelts = c_dialect_cxx () ? sizeof cxx_opers / sizeof *cxx_opers : 0;
+ if (const char *ret = check_tokens (cxx_opers, nelts,
+ format_string_loc, format_string_cst,
+ orig_format_chars, format_chars,
+ baltoks))
+ return ret;
+ }
+
+ if (ISALPHA (format_chars[0]))
+ {
+ size_t nelts = sizeof c_keywords / sizeof *c_keywords;
+ if (const char *ret = check_tokens (c_keywords, nelts,
+ format_string_loc, format_string_cst,
+ orig_format_chars, format_chars,
+ baltoks))
+ return ret;
+
+ nelts = c_dialect_cxx () ? sizeof cxx_keywords / sizeof *cxx_keywords : 0;
+ if (const char *ret = check_tokens (cxx_keywords, nelts,
+ format_string_loc, format_string_cst,
+ orig_format_chars, format_chars,
+ baltoks))
+ return ret;
+ }
+
+ nchars = 0;
+
+ /* Diagnose unquoted options. */
+ if ((format_chars == orig_format_chars
+ || format_chars[-1] == ' ')
+ && format_chars[0] == '-'
+ && ((format_chars[1] == '-'
+ && ISALPHA (format_chars[2]))
+ || ISALPHA (format_chars[1])))
+ {
+ nchars = 1;
+ while (ISALNUM (format_chars[nchars])
+ || '_' == format_chars[nchars]
+ || '-' == format_chars[nchars]
+ || '+' == format_chars[nchars])
+ ++nchars;
+
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted option name %<%.*s%> in format",
+ nchars, format_chars);
+ return format_chars + nchars - 1;
+ }
+
+ /* Diagnose leading, trailing, and two or more consecutive punctuation
+ characters. */
+ const char *unbalanced = NULL;
+ while ('%' != format_chars[nchars]
+ && ISPUNCT (format_chars[nchars])
+ && !unbalanced)
+ {
+ switch (format_chars[nchars])
+ {
+ case '[':
+ baltoks.brackets.safe_push (format_chars + nchars);
+ break;
+ case '{':
+ baltoks.curly.safe_push (format_chars + nchars);
+ break;
+ case '(':
+ baltoks.parens.safe_push (format_chars + nchars);
+ break;
+ case '<':
+ baltoks.pointy.safe_push (format_chars + nchars);
+ break;
+
+ case ']':
+ if (baltoks.brackets.length () > 0)
+ baltoks.brackets.pop ();
+ else
+ unbalanced = format_chars + nchars;
+ break;
+ case '}':
+ if (baltoks.curly.length () > 0)
+ baltoks.curly.pop ();
+ else
+ unbalanced = format_chars + nchars;
+ break;
+ case ')':
+ if (baltoks.parens.length () > 0)
+ baltoks.parens.pop ();
+ else
+ unbalanced = format_chars + nchars;
+ break;
+ case '>':
+ if (baltoks.pointy.length () > 0)
+ baltoks.pointy.pop ();
+ else
+ unbalanced = format_chars + nchars;
+ break;
+ }
+
+ ++nchars;
+ }
+
+ if (unbalanced)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unbalanced punctuation character %qc in format",
+ *unbalanced);
+ return format_chars + nchars - 1;
+ }
+
+ if (nchars)
+ {
+ /* Consider any identifier that follows the pound ('#') sign
+ a preprocessing directive. */
+ if (nchars == 1
+ && format_chars[0] == '#'
+ && ISALPHA (format_chars[1]))
+ {
+ while (ISALNUM (format_chars[nchars])
+ || format_chars[nchars] == '_')
+ ++nchars;
+
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted preprocessing directive %<%.*s%> "
+ "in format", nchars, format_chars);
+ return format_chars + nchars - 1;
+ }
+
+ /* Diagnose a bare single quote. */
+ if (nchars == 1
+ && format_chars[0] == '\''
+ && format_chars - orig_format_chars
+ && ISALPHA (format_chars[-1])
+ && ISALPHA (format_chars[1]))
+ {
+ /* Diagnose a subset of contractions that are best avoided. */
+ for (unsigned i = 0; i != sizeof contrs / sizeof *contrs; ++i)
+ {
+ const char *apos = strchr (contrs[i].name, '\'');
+ gcc_assert (apos != NULL);
+ int off = apos - contrs[i].name;
+
+ if (format_chars - orig_format_chars >= off
+ && !strncmp (format_chars - off,
+ contrs[i].name, contrs[i].len))
+ {
+ format_warning_substr (format_string_loc,
+ format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "contraction %<%.*s%> in format; "
+ "use %qs instead",
+ contrs[i].len, contrs[i].name,
+ contrs[i].alt);
+ return format_chars + nchars - 1;
+ }
+ }
+
+ if (format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "bare apostrophe %<'%> in format"))
+ inform (format_string_loc,
+ "if avoiding the apostrophe is not feasible, enclose "
+ "it in a pair of %qs and %qs directives instead",
+ "%<", "%>");
+ return format_chars + nchars - 1;
+ }
+
+ /* Diagnose a backtick (grave accent). */
+ if (nchars == 1
+ && format_chars[0] == '`')
+ {
+ if (format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "grave accent %<`%> in format"))
+ inform (format_string_loc,
+ "use the apostrophe directive %qs instead", "%'");
+ return format_chars + nchars - 1;
+ }
+
+ /* Diagnose a punctuation character after a space. */
+ if (nchars == 1
+ && format_chars - orig_format_chars
+ && format_chars[-1] == ' '
+ && strspn (format_chars, "!?:;.,") == 1)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos - 1, fmtchrpos, opt,
+ "space followed by punctuation character "
+ "%<%c%>", format_chars[0]);
+ return format_chars;
+ }
+
+ if (nchars == 1)
+ {
+ if (!strncmp (format_chars, "\"%s\"", 4))
+ {
+ if (format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + 4, opt,
+ "quoted %qs directive in format",
+ "%s"))
+ inform (format_string_loc, "if using %qs is not feasible, "
+ "use %qs instead", "%qs", "\"%-s\"");
+ }
+
+ if (format_chars[0] == '"')
+ {
+ baltoks.doublequote = baltoks.doublequote ? NULL : format_chars;
+ return format_chars + nchars - 1;
+ }
+ if (format_chars[0] == '\'')
+ {
+ baltoks.singlequote = baltoks.singlequote ? NULL : format_chars;
+ return format_chars + nchars - 1;
+ }
+ }
+
+ if (fmtchrpos == 0)
+ {
+ if (nchars == 1
+ && format_chars[0] == '(')
+ ; /* Text beginning in an open parenthesis. */
+ else if (nchars == 3
+ && !strncmp (format_chars, "...", 3)
+ && format_chars[3])
+ ; /* Text beginning in an ellipsis. */
+ else
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "spurious leading punctuation sequence "
+ "%<%.*s%> in format",
+ nchars, format_chars);
+ return format_chars + nchars - 1;
+ }
+ }
+ else if (!format_chars[nchars])
+ {
+ if (nchars == 1
+ && (format_chars[nchars - 1] == ':'
+ || format_chars[nchars - 1] == ')'))
+ ; /* Text ending in a colon or a closing parenthesis. */
+ else if (nchars == 1
+ && ((ISUPPER (*orig_format_chars)
+ && format_chars[nchars - 1] == '.')
+ || strspn (format_chars + nchars - 1, "?])") == 1))
+ ; /* Capitalized sentence terminated by a single period,
+ or text ending in a question mark, closing bracket,
+ or parenthesis. */
+ else if (nchars == 2
+ && format_chars[0] == '?'
+ && format_chars[1] == ')')
+ ; /* A question mark after a closing parenthetical note. */
+ else if (nchars == 2
+ && format_chars[0] == ')'
+ && (format_chars[1] == '?'
+ || format_chars[1] == ';'
+ || format_chars[1] == ':'
+ || (ISUPPER (*orig_format_chars)
+ && format_chars[1] == '.')))
+ ; /* Closing parethetical note followed by a question mark,
+ semicolon, or colon at the end of the string, or by
+ a period at the end of a capitalized sentence. */
+ else if (nchars == 3
+ && format_chars - orig_format_chars > 0
+ && !strncmp (format_chars, "...", 3))
+ ; /* Text ending in the ellipsis. */
+ else
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "spurious trailing punctuation sequence "
+ "%<%.*s%> in format",
+ nchars, format_chars);
+
+ return format_chars + nchars - 1;
+ }
+ else if (nchars == 2
+ && format_chars[0] == ')'
+ && (format_chars[1] == ':'
+ || format_chars[1] == ';'
+ || format_chars[1] == ',')
+ && format_chars[2] == ' ')
+ ; /* Closing parethetical note followed by a colon, semicolon
+ or a comma followed by a space in the middle of the string. */
+ else if (nchars > 1)
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted sequence of %i consecutive "
+ "punctuation characters %q.*s in format",
+ nchars, nchars, format_chars);
+ return format_chars + nchars - 1;
+ }
+
+ nchars = 0;
+
+ /* Finally, diagnose any unquoted non-graph, non-punctuation characters
+ other than the terminating NUL. */
+ while (format_chars[nchars]
+ && '%' != format_chars[nchars]
+ && !ISPUNCT (format_chars[nchars])
+ && !ISGRAPH (format_chars[nchars]))
+ ++nchars;
+
+ if (nchars > 1)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted non-graph characters in format");
+ return format_chars + nchars - 1;
+ }
+ if (nchars)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted non-graph character %qc in format",
+ *format_chars);
+ return format_chars + nchars - 1;
+ }
+
+ return format_chars;
+}
+
+static void
+maybe_diag_unbalanced_tokens (location_t format_string_loc,
+ const char *orig_format_chars,
+ tree format_string_cst,
+ baltoks_t &baltoks)
+{
+ const char *unbalanced = NULL;
+
+ if (baltoks.brackets.length ())
+ unbalanced = baltoks.brackets.pop ();
+ else if (baltoks.curly.length ())
+ unbalanced = baltoks.curly.pop ();
+ else if (baltoks.parens.length ())
+ unbalanced = baltoks.parens.pop ();
+ else if (baltoks.pointy.length ())
+ unbalanced = baltoks.pointy.pop ();
+
+ if (unbalanced)
+ format_warning_at_char (format_string_loc, format_string_cst,
+ unbalanced - orig_format_chars + 1,
+ OPT_Wformat_diag,
+ "unbalanced punctuation character %<%c%> in format",
+ *unbalanced);
+
+ if (baltoks.quotdirs.length ())
+ format_warning_at_char (format_string_loc, format_string_cst,
+ baltoks.quotdirs.pop () - orig_format_chars,
+ OPT_Wformat_,
+ "unterminated quoting directive");
+
+ const char *quote
+ = baltoks.singlequote ? baltoks.singlequote : baltoks.doublequote;
+
+ if (quote)
+ format_warning_at_char (format_string_loc, format_string_cst,
+ quote - orig_format_chars + 1,
+ OPT_Wformat_diag,
+ "unterminated quote character %<%c%> in format",
+ *quote);
+}
+
/* Do the main part of checking a call to a format function. FORMAT_CHARS
is the NUL-terminated format string (which at this point may contain
internal NUL characters); FORMAT_LENGTH is its length (excluding the
@@ -2816,8 +3775,10 @@ check_format_info_main (format_check_results *res,
and it didn't use $; 1 if $ formats are in use. */
int has_operand_number = -1;
- /* Vector of pointers to opening quoting directives (like GCC "%<"). */
- auto_vec<const char*> quotdirs;
+ /* Vectors of pointers to opening quoting directives (like GCC "%<"),
+ opening braces, brackets, and parentheses. Used to detect unbalanced
+ tokens. */
+ baltoks_t baltoks;
/* Pointers to the most recent color directives (like GCC's "%r or %R").
A starting color directive much be terminated before the end of
@@ -2828,10 +3789,26 @@ check_format_info_main (format_check_results *res,
init_dollar_format_checking (info->first_arg_num, first_fillin_param);
+ /* In GCC diagnostic functions check plain directives (substrings within
+ the format string that don't start with %) for quoting and punctuations
+ problems. */
+ bool ck_plain = (!info->is_raw
+ && (info->format_type == gcc_diag_format_type
+ || info->format_type == gcc_tdiag_format_type
+ || info->format_type == gcc_cdiag_format_type
+ || info->format_type == gcc_cxxdiag_format_type));
+
while (*format_chars != 0)
{
- if (*format_chars++ != '%')
+ if (ck_plain)
+ format_chars = check_plain (format_string_loc,
+ format_string_cst,
+ orig_format_chars, format_chars,
+ baltoks);
+
+ if (*format_chars == 0 || *format_chars++ != '%')
continue;
+
if (*format_chars == 0)
{
format_warning_at_char (format_string_loc, format_string_cst,
@@ -2846,6 +3823,8 @@ check_format_info_main (format_check_results *res,
continue;
}
+ /* ARGUMENT_PARSER ctor takes FORMAT_CHARS by reference and calls
+ to ARG_PARSER members may modify the variable. */
flag_chars_t flag_chars;
argument_parser arg_parser (info, format_chars, format_string_cst,
orig_format_chars, format_string_loc,
@@ -2908,7 +3887,7 @@ check_format_info_main (format_check_results *res,
flag_chars.validate (fki, fci, flag_specs, format_chars,
format_string_cst,
format_string_loc, orig_format_chars, format_char,
- quotdirs.length () > 0);
+ baltoks.quotdirs.length () > 0);
const int alloc_flag = flag_chars.get_alloc_flag (fki);
const bool suppressed = flag_chars.assignment_suppression_p (fki);
@@ -2920,17 +3899,17 @@ check_format_info_main (format_check_results *res,
if (quot_begin_p && !quot_end_p)
{
- if (quotdirs.length ())
+ if (baltoks.quotdirs.length ())
format_warning_at_char (format_string_loc, format_string_cst,
format_chars - orig_format_chars,
OPT_Wformat_,
"nested quoting directive");
- quotdirs.safe_push (format_chars);
+ baltoks.quotdirs.safe_push (format_chars);
}
else if (!quot_begin_p && quot_end_p)
{
- if (quotdirs.length ())
- quotdirs.pop ();
+ if (baltoks.quotdirs.length ())
+ baltoks.quotdirs.pop ();
else
format_warning_at_char (format_string_loc, format_string_cst,
format_chars - orig_format_chars,
@@ -2962,7 +3941,7 @@ check_format_info_main (format_check_results *res,
/* Diagnose directives that shouldn't appear in a quoted sequence.
(They are denoted by a double quote in FLAGS2.) */
- if (quotdirs.length ())
+ if (baltoks.quotdirs.length ())
{
if (strchr (fci->flags2, '"'))
format_warning_at_char (format_string_loc, format_string_cst,
@@ -3018,10 +3997,9 @@ check_format_info_main (format_check_results *res,
if (has_operand_number > 0)
finish_dollar_format_checking (res, fki->flags & (int) FMT_FLAG_DOLLAR_GAP_POINTER_OK);
- if (quotdirs.length ())
- format_warning_at_char (format_string_loc, format_string_cst,
- quotdirs.pop () - orig_format_chars,
- OPT_Wformat_, "unterminated quoting directive");
+ maybe_diag_unbalanced_tokens (format_string_loc, orig_format_chars,
+ format_string_cst, baltoks);
+
if (color_begin && !color_end)
format_warning_at_char (format_string_loc, format_string_cst,
color_begin - orig_format_chars,
@@ -4199,7 +5177,7 @@ handle_format_attribute (tree *node, tree atname, tree args,
if (arg_num != info.first_arg_num)
{
if (!(flags & (int) ATTR_FLAG_BUILT_IN))
- error ("args to be formatted is not %<...%>");
+ error ("argument to be formatted is not %<...%>");
*no_add_attrs = true;
return NULL_TREE;
}
diff --git a/gcc/testsuite/gcc.dg/format/gcc_diag-11.c b/gcc/testsuite/gcc.dg/format/gcc_diag-11.c
new file mode 100644
index 00000000000..a976c7aa519
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/format/gcc_diag-11.c
@@ -0,0 +1,455 @@
+/* Test warnings for common punctuation, quoting, and spelling issues
+ in GCC diagnostics.
+ { dg-do compile }
+ { dg-options "-Wformat -Wformat-diag" } */
+
+/* Magic identifiers must be set before the attribute is used. */
+
+typedef long long __gcc_host_wide_int__;
+
+typedef struct location_s
+{
+ const char *file;
+ int line;
+} location_t;
+
+union tree_node;
+typedef union tree_node *tree;
+
+/* Define gimple as a dummy type. The typedef must be provided for
+ the C test to find the symbol. */
+typedef struct gimple gimple;
+
+/* Likewise for cgraph_node. */
+typedef struct cgraph_node cgraph_node;
+
+#define ATTR(...) __attribute__ ((__VA_ARGS__))
+#define FORMAT(kind) ATTR (format (__gcc_## kind ##__, 1, 2))
+
+/* Raw formatting function like pp_format. */
+void diag_raw (const char*, ...) ATTR (format (__gcc_diag_raw__, 1, 2));
+void cdiag_raw (const char*, ...) ATTR (format (__gcc_cdiag_raw__, 1, 2));
+void tdiag_raw (const char*, ...) ATTR (format (gcc_tdiag_raw, 1, 2));
+void cxxdiag_raw (const char*, ...) ATTR (format (gcc_cxxdiag_raw, 1, 2));
+
+/* Basic formatting function_format. */
+void diag (const char*, ...) FORMAT (diag);
+
+/* Diagnostic formatting function like error or warning declared
+ by the C front end. */
+void cdiag (const char*, ...) FORMAT (cdiag);
+
+/* Diagnostic formatting function like error or warning declared
+ by the middle-end or back-end. */
+void tdiag (const char*, ...) FORMAT (tdiag);
+
+/* Diagnostic formatting function like error or warning declared
+ by the C++ front-end. */
+void cxxdiag (const char*, ...) FORMAT (cxxdiag);
+
+
+/* Verify that functions declared with __gcc_diag_raw__ attribute
+ are not subject to -Wformat-diag. */
+
+void test_diag_raw (tree t, gimple *gc)
+{
+ diag_raw ("a b");
+ diag_raw ("newline\n");
+ diag_raw ("lone period.");
+ diag_raw ("multiple punctuators: !!!");
+ diag_raw ("unbalanced paren (");
+ diag_raw ("keyword alignas and identifier_with_underscores");
+ diag_raw ("disable __builtin_abs with the -fno-builtin-abs option");
+ diag_raw ("who says I can't have no stinkin' contractions? ");
+
+ cdiag_raw ("__atomic_sync (%qE) == 7???", t);
+ tdiag_raw ("__builtin_abs (%E) < 0!?!", t);
+ cxxdiag_raw ("template <> int f (%E", t);
+}
+
+/* Verify that functions declared with the C front-end __gcc_cdiag__
+ attribute detect invalid whitespace in format strings. */
+
+void test_cdiag_whitespace (tree t, gimple *gc)
+{
+ (void)&t; (void)&gc;
+
+ /* Verify that strings of leading spaces don't trigger a warning. */
+ cdiag (" a");
+ cdiag (" b");
+ cdiag (" c");
+ cdiag ("%< %>a");
+ cdiag ("%< %>a");
+ cdiag ("a b");
+ cdiag ("a b"); /* { dg-warning "unquoted sequence of 2 consecutive space characters" } */
+ cdiag ("a "); /* { dg-warning "spurious trailing space" } */
+ cdiag ("a "); /* { dg-warning "spurious trailing space" } */
+ cdiag ("a%< %>");
+ cdiag ("a%< %>%< %>");
+ cdiag ("a%< %> "); /* { dg-warning "spurious trailing space" } */
+ cdiag ("a%< %> %< %>"); /* { dg-warning "unquoted sequence of 2 consecutive space characters" } */
+
+ /* It's debatable whether the following two formst strings should
+ be diagnosed. They aren't only because it's simpler that way. */
+ cdiag ("a %< %>");
+ cdiag ("a%< %> %< %>");
+
+ /* Exercise other whitespace characters. */
+ cdiag ("a\fb"); /* { dg-warning "unquoted whitespace character '\\\\x0c'" } */
+ cdiag ("a\nb"); /* { dg-warning "unquoted whitespace character '\\\\x0a'" } */
+ cdiag ("a\rb"); /* { dg-warning "unquoted whitespace character '\\\\x0d'" } */
+ cdiag ("a\vb"); /* { dg-warning "unquoted whitespace character '\\\\x0b'" } */
+
+ cdiag ("First sentence. And a next.");
+ cdiag ("First sentence. not capitalized sentence"); /* { dg-warning "inconsistent capitalization" } */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-diag"
+
+ /* Verify that the warning can be suppressed. */
+ cdiag ("\ta\b c\vb\n");
+
+#pragma GCC diagnostic pop
+}
+
+
+void test_cdiag_control (tree t, gimple *gc)
+{
+ (void)&t; (void)&gc;
+
+ cdiag ("\1"); /* { dg-warning "unquoted control character '\\\\x01'" } */
+ cdiag ("a\ab"); /* { dg-warning "unquoted control character '\\\\x07'" } */
+ cdiag ("a\bb"); /* { dg-warning "unquoted control character '\\\\x08'" } */
+}
+
+
+void test_cdiag_punct (tree t, gimple *gc, int i)
+{
+ (void)&t; (void)&gc;
+
+ /* Exercise the period. */
+ cdiag (".abc"); /* { dg-warning "spurious leading punctuation sequence .\.." } */
+ cdiag ("abc;"); /* { dg-warning "spurious trailing punctuation sequence .;." } */
+ /* Verify that sentences that start with an uppercase letter and end
+ in a period are not diagnosed. */
+ cdiag ("This is a full sentence.");
+ cdiag ("Capitalized sentence (with a parethetical note).");
+ cdiag ("Not a full sentence;"); /* { dg-warning "spurious trailing punctuation sequence .;." } */
+ cdiag ("Neither is this one,"); /* { dg-warning "spurious trailing punctuation sequence .,." } */
+
+ /* Exercise the ellipsis. */
+ cdiag ("this message...");
+ cdiag ("...continues here");
+ cdiag ("but...not here"); /* { dg-warning "unquoted sequence of 3 consecutive punctuation characters" } */
+
+ /* Verify that parenthesized sentences are accepted, even the whole
+ meesage (done in the C++ front end). */
+ cdiag ("null argument where non-null required (argument %i)", i);
+ cdiag ("null (argument %i) where non-null required", i);
+ cdiag ("(see what comes next)");
+
+ /* Verify that only a single trailing colon is accepted. */
+ cdiag ("candidates are:");
+ cdiag ("candidates are::"); /* { dg-warning "spurious trailing punctuation sequence .::." } */
+
+ /* Exercise C++. */
+ cdiag ("C++ is cool");
+ cdiag ("this is c++");
+ cdiag ("you can do this in C++ but not in C");
+
+ /* Also verify that G++ is accepted. */
+ cdiag ("G++ rocks");
+ cdiag ("this is accepted by g++");
+ cdiag ("valid in G++ (or g++) but not in gcc");
+
+ /* Exercise parenthetical note followed by a colon, semicolon,
+ or a comma. */
+ cdiag ("found a bug (here):");
+ cdiag ("because of another bug (over there); fix it");
+
+ cdiag ("found foo (123): go look at it");
+ cdiag ("missed bar (abc); will try harder next time");
+
+ cdiag ("expected this (or that), got something else (or who knows what)");
+
+ /* Exercise parenthetical note with a question mark. */
+ cdiag ("hmmm (did you really mean that?)");
+ cdiag ("error (did you mean %<foo()%>?)");
+ /* And a question mark after a parenthetical note. */
+ cdiag ("did you mean this (or that)?");
+
+ /* But make sure unbalanced parenthese are diagnosed. */
+ cdiag ("or this or the other)?"); /* { dg-warning "unbalanced punctuation character '\\\)'" } */
+
+ cdiag ("## Heading"); /* { dg-warning "spurious leading punctuation sequence .##." } */
+ cdiag ("## %s ##", "1"); /* { dg-warning "spurious (leading|trailing) punctuation sequence .##." } */
+
+ cdiag ("#1 priority"); /* { dg-warning "spurious leading punctuation sequence .#." } */
+ cdiag ("priority #2");
+
+ /* Quoting. */
+ cdiag ("\"quoted\"");
+ cdiag ("\"quoted\" string");
+ cdiag ("this is a \"string in quotes\"");
+ cdiag ("\"missing closing quote"); /* { dg-warning "unterminated quote character '\"'" } */
+
+ /* PR translation/90121 - punctuation character after a space. */
+ cdiag ("bad version : 1"); /* { dg-warning "space followed by punctuation character ':'" } */
+ cdiag ("problem ; fix it"); /* { dg-warning "space followed by punctuation character ';'" } */
+ cdiag ("End . not."); /* { dg-warning "space followed by punctuation character '.'" } */
+ cdiag ("it is bad , very bad"); /* { dg-warning "space followed by punctuation character ','" } */
+ cdiag ("say what ?"); /* { dg-warning "space followed by punctuation character '?'" } */
+
+ /* But these are okay after a space. But should they be? */
+ cdiag ("1 / 2");
+ cdiag ("2 + 3");
+ cdiag ("2 - 3");
+}
+
+void test_cdiag_punct_balance (tree t, gimple *gc)
+{
+ (void)&t; (void)&gc;
+
+ /* Less-than and greater than. */
+ cdiag ("a < b"); /* { dg-warning "unbalanced punctuation character '<' in format" } */
+ cdiag ("must be > 0"); /* { dg-warning "unbalanced punctuation character '>' in format" } */
+
+ cdiag ("f()"); /* { dg-warning "spurious trailing punctuation sequence .\\\(\\\)." } */
+ cdiag ("g(1)");
+ cdiag ("("); /* { dg-warning "spurious leading punctuation character|unbalanced" } */
+ cdiag ("()"); /* { dg-warning "spurious leading punctuation sequence" } */
+ cdiag (")"); /* { dg-warning "unbalanced punctuation character '\\\)'" } */
+ cdiag ("f()g"); /* { dg-warning "unquoted sequence of 2 consecutive punctuation characters" } */
+ cdiag ("illegal operand (1)");
+}
+
+
+void test_cdiag_nongraph (tree t, gimple *gc)
+{
+ (void)&t; (void)&gc;
+
+ cdiag ("a\376b"); /* { dg-warning "unquoted non-graph character '\\\\xfe'" } */
+ cdiag ("a\377b"); /* { dg-warning "unquoted non-graph character '\\\\xff'" } */
+}
+
+
+void test_cdiag_attribute (tree t, gimple *gc)
+{
+ (void)&t; (void)&gc;
+
+ cdiag ("attribute foo");
+ cdiag ("this is attribute bar");
+ cdiag ("bad __attribute bar"); /* { dg-warning "unquoted attribute" } */
+ cdiag ("__attribute__ (foobar) bad"); /* { dg-warning "unquoted attribute" } */
+ cdiag ("__attribute__ ((foobar))"); /* { dg-warning "unquoted attribute" } */
+ cdiag ("__attribute__ (xxx))"); /* { dg-warning "unquoted attribute" } */
+ /* { dg-warning "unbalanced punctuation character '\\\)'" "xxx" { target *-*-* } .-1 } */
+ cdiag ("__attribute__ ((yyy)))"); /* { dg-warning "unquoted attribute" } */
+ /* { dg-warning "unbalanced punctuation character '\\\)'" "yyy" { target *-*-* } .-1 } */
+ cdiag ("__attribute__ ((zzz)"); /* { dg-warning "unquoted attribute" } */
+ /* { dg-warning "unbalanced punctuation character '\\\('" "zzz" { target *-*-* } .-1 } */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-diag"
+
+ /* Verify that the warning can be suppressed. */
+ cdiag ("__attribute__ (((");
+
+#pragma GCC diagnostic pop
+}
+
+void test_cdiag_builtin (tree t, gimple *gc)
+{
+ (void)&t; (void)&gc;
+
+ cdiag ("__builtin_abort"); /* { dg-warning "unquoted name of built-in function '__builtin_abort'" } */
+ cdiag ("in __builtin_trap"); /* { dg-warning "unquoted name of built-in function '__builtin_trap'" } */
+ cdiag ("__builtin_xyz bites");/* { dg-warning "unquoted name of built-in function '__builtin_xyz'" } */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-diag"
+
+ /* Verify that the warning can be suppressed. */
+ cdiag ("__builtin____with____lots__of__underscores");
+
+#pragma GCC diagnostic pop
+}
+
+
+void test_cdiag_option (tree t, gimple *gc)
+{
+ (void)&t; (void)&gc;
+
+ cdiag ("%<-Wall%>");
+ cdiag ("use option %<-Wextra%> to enable additinal warnings");
+
+ cdiag ("-O2 is fast"); /* { dg-warning "unquoted option name '-O2'" } */
+ cdiag ("but -O3 is faster"); /* { dg-warning "unquoted option name '-O3'" } */
+
+ cdiag ("get --help"); /* { dg-warning "unquoted option name '--help'" } */
+ cdiag ("enable -m32"); /* { dg-warning "unquoted option name '-m32'" } */
+ cdiag ("value is -12");
+ cdiag ("foo-O2");
+ cdiag ("a-W");
+}
+
+
+void test_cdiag_keyword (tree t, gimple *gc)
+{
+ cdiag ("alignasi");
+ cdiag ("malignofer or alignofus");
+ cdiag ("use alignof"); /* { dg-warning "unquoted keyword 'alignof'" } */
+ cdiag ("or _Alignof"); /* { dg-warning " keyword '_Alignof'" } */
+ cdiag ("_Pragma too"); /* { dg-warning " keyword '_Pragma'" } */
+
+ cdiag ("a #error directive"); /* { dg-warning "unquoted preprocessing directive '#error'" } */
+ cdiag ("#include file"); /* { dg-warning "unquoted preprocessing directive '#include'" } */
+ cdiag ("but #pragma foobar"); /* { dg-warning "unquoted preprocessing directive '#pragma'" } */
+ cdiag ("pragma foobar is okay");
+ cdiag ("or even # pragma is fine");
+
+ /* Exercise qualifiers. */
+ cdiag ("const function");
+ cdiag ("const-qualified variable"); /* { dg-warning "unquoted keyword 'const-qualified'" } */
+ /* { dg-message "use '%<const%>-qualified' instead" "const-qualified" { target *-*-* } .-1 } */
+ cdiag ("a const %qD", t); /* { dg-warning "unquoted keyword 'const'" } */
+ cdiag ("restrict %qE", t); /* { dg-warning "unquoted keyword 'restrict'" } */
+ cdiag ("volatile %qT", t); /* { dg-warning "unquoted keyword 'volatile'" } */
+ cdiag ("const %qD and restrict %qE or volatile %qT", t, t, t);
+ /* { dg-warning "unquoted keyword 'const'" "" { target *-*-* } .-1 } */
+ /* { dg-warning "unquoted keyword 'restrict'" "" { target *-*-* } .-2 } */
+ /* { dg-warning "unquoted keyword 'volatile'" "" { target *-*-* } .-3 } */
+
+ cdiag ("an offsetof here"); /* { dg-warning "unquoted keyword 'offsetof" } */
+ cdiag ("sizeof x"); /* { dg-warning "unquoted keyword 'sizeof" } */
+ cdiag ("have typeof"); /* { dg-warning "unquoted keyword 'typeof" } */
+
+ /* Words that are not keywords are so are not expected to be quoted. */
+ cdiag ("break rules");
+ cdiag ("if we continue by default for a short while else do nothing");
+ cdiag ("register a function for unsigned extern to void const reads");
+ cdiag ("or volatile access");
+}
+
+
+void test_cdiag_operator (tree t, gimple *gc)
+{
+ cdiag ("x != 0"); /* { dg-warning "unquoted operator '!='" } */
+ cdiag ("logical &&"); /* { dg-warning "unquoted operator '&&" } */
+ cdiag ("+= operator"); /* { dg-warning "unquoted operator '\\\+=" } */
+ cdiag ("a == b"); /* { dg-warning "unquoted operator '=='" } */
+ cdiag ("++a"); /* { dg-warning "unquoted operator '\\\+\\\+'" } */
+ cdiag ("b--"); /* { dg-warning "unquoted operator '--'" } */
+ cdiag ("1 << 2"); /* { dg-warning "unquoted operator '<<'" } */
+ cdiag (">> here <<"); /* { dg-warning "unquoted operator '>>|<<'" } */
+}
+
+
+void test_cdiag_type_name (tree t, gimple *gc)
+{
+ cdiag ("the word character should not be quoted");
+ cdiag ("but char should be"); /* { dg-warning "unquoted keyword 'char'" } */
+
+ cdiag ("unsigned char should be quoted"); /* { dg-warning "unquoted type name 'unsigned char'" } */
+ cdiag ("but unsigned character is fine");
+
+ cdiag ("as should int"); /* { dg-warning "unquoted keyword 'int'" } */
+ cdiag ("and signed int"); /* { dg-warning "unquoted type name 'signed int'" } */
+ cdiag ("and also unsigned int"); /* { dg-warning "unquoted type name 'unsigned int'" } */
+ cdiag ("very long thing");
+ cdiag ("use long long here"); /* { dg-warning "unquoted type name 'long long'" } */
+
+ cdiag ("have a floating type");
+ cdiag ("found float type"); /* { dg-warning "unquoted keyword 'float'" } */
+
+ cdiag ("wchar_t is wide"); /* { dg-warning "unquoted identifier or keyword 'wchar_t'" } */
+}
+
+
+void test_cdiag_identifier (tree t, gimple *gc)
+{
+ (void)&t; (void)&gc;
+
+ cdiag ("private _x ident"); /* { dg-warning "unquoted identifier or keyword '_x'" } */
+ cdiag ("and another __y"); /* { dg-warning "unquoted identifier or keyword '__y'" } */
+ cdiag ("ident z_ with trailing underscore"); /* { dg-warning "unquoted identifier or keyword 'z_'" } */
+ cdiag ("v_ variable"); /* { dg-warning "unquoted identifier or keyword 'v_'" } */
+ cdiag ("call foo_bar"); /* { dg-warning "unquoted identifier or keyword 'foo_bar'" } */
+ cdiag ("unqoted x_y ident"); /* { dg-warning "unquoted identifier or keyword 'x_y'" } */
+
+ cdiag ("size_t type"); /* { dg-warning "unquoted identifier or keyword 'size_t'" } */
+ cdiag ("bigger than INT_MAX");/* { dg-warning "unquoted identifier or keyword 'INT_MAX'" } */
+
+ cdiag ("quoted ident %<a_b%>");
+ cdiag ("another quoted identifier %<x_%> here");
+}
+
+
+void test_cdiag_bad_words (tree t, gimple *gc)
+{
+ (void)&t; (void)&gc;
+
+ cdiag ("aren't you dumb?"); /* { dg-warning "bare apostrophe ''' in format" } */
+ cdiag ("bitfields suck"); /* { dg-warning "misspelled term 'bitfields' in format; use 'bit-fields' instead" } */
+ cdiag ("invalid bitfield"); /* { dg-warning "misspelled term 'bitfield' in format; use 'bit-field' instead" } */
+ cdiag ("bad builtin function"); /* { dg-warning "misspelled term 'builtin function' in format; use 'built-in function' instead" } */
+ cdiag ("bad builtin function"); /* { dg-warning "misspelled term 'builtin function' in format; use 'built-in function' instead" } */
+ cdiag ("builtin function x"); /* { dg-warning "misspelled term 'builtin function' in format; use 'built-in function' instead" } */
+ cdiag ("builtin functions disabled"); /* { dg-warning "misspelled term 'builtin functions' in format; use 'built-in functions' instead" } */
+ cdiag ("enable builtin functions"); /* { dg-warning "misspelled term 'builtin functions' in format; use 'built-in functions' instead" } */
+ cdiag ("you can't do that"); /* { dg-warning "contraction 'can't' in format" } */
+ cdiag ("you can%'t do that");/* { dg-warning "contraction 'can%'t' in format" } */
+ cdiag ("Can%'t touch this.");/* { dg-warning "contraction 'Can%'t' in format" } */
+ cdiag ("on the commandline");/* { dg-warning "misspelled term 'commandline' in format; use 'command line' instead" } */
+ cdiag ("command line option");/* { dg-warning "misspelled term 'command line option' in format; use 'command-line option' instead" } */
+ cdiag ("it mustn't be"); /* { dg-warning "contraction 'mustn't' in format" } */
+ cdiag ("isn't that silly?"); /* { dg-warning "bare apostrophe ''' in format" } */
+
+ cdiag ("can not do this"); /* { dg-warning "misspelled term 'can not' in format; use 'cannot' instead" } */
+ cdiag ("you can not"); /* { dg-warning "misspelled term 'can not' in format; use 'cannot' instead" } */
+
+ /* See PR target/90157 - aarch64: unnecessary abbreviation in diagnostic */
+ cdiag ("Mising arg."); /* { dg-warning "misspelled term 'arg' in format; use 'argument' instead" } */
+ cdiag ("2 args: a and b"); /* { dg-warning "misspelled term 'args' in format; use 'arguments' instead" } */
+ cdiag ("arg 1"); /* { dg-warning "misspelled term 'arg' in format; use 'argument' instead" } */
+ cdiag ("Args are wrong."); /* { dg-warning "misspelled term 'Args' in format; use 'arguments' instead" } */
+ cdiag ("bad arg"); /* { dg-warning "misspelled term 'arg' in format; use 'argument' instead" } */
+ cdiag ("two args"); /* { dg-warning "misspelled term 'args' in format; use 'arguments' instead" } */
+ cdiag ("args 1 and 2"); /* { dg-warning "misspelled term 'args' in format; use 'arguments' instead" } */
+
+ cdiag ("Reg A"); /* { dg-warning "misspelled term 'Reg' in format; use 'register' instead" } */
+ cdiag ("regs A and B"); /* { dg-warning "misspelled term 'regs' in format; use 'registers' instead" } */
+ cdiag ("no regs"); /* { dg-warning "misspelled term 'regs' in format; use 'registers' instead" } */
+
+ /* Verify words that end in "arg" and "args" or "reg" and "regs" are
+ not diagnosed. */
+ cdiag ("gulmarg and balfarg");
+ cdiag ("ademargs or toshargs");
+ cdiag ("talk to Greg");
+ cdiag ("prepreg is a fabric");
+ cdiag ("there are dregs in my wine");
+}
+
+
+void test_cdiag_directive (tree t, gimple *gc)
+{
+ (void)&t; (void)&gc;
+
+ cxxdiag ("%<%s%>", ""); /* { dg-warning "quoted '%s' directive in format" } */
+ /* This was asked to be diagnosed in PR #90158 but there, the \"%s\"
+ is in parenheses which ends up getting diagnosed because of
+ the two consecutive punctuation characters, ( and ". */
+ cdiag ("\"%s\"", ""); /* { dg-warning "quoted '%s' directive in format" } */
+
+ /* Make sure quoted paired tokens are not diagnosed. */
+ cdiag ("%<'%>");
+ cdiag ("%<\"%>");
+ cdiag ("%<<%>");
+ cdiag ("%<>%>");
+ cdiag ("%<(%>");
+ cdiag ("%<)%>");
+ cdiag ("%<[%>");
+ cdiag ("%<]%>");
+
+ cdiag ("%<'%> %<\"%> %<>%> %<<%> %<)%> %<(%> %<]%> %<[%>");
+}