This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] implement -Wformat-diag, v2


Attached is a revised implementation of the -Wformat-diag checker
incorporating the feedback I got on the first revision.

Martin
gcc/c-family/ChangeLog:

	* c-format.c (function_format_info::format_type): Adjust type.
	(function_format_info::is_raw): New member.
	(decode_format_type): Adjust signature.  Handle "raw" diag attributes.
	(decode_format_attr): Adjust call to decode_format_type.
	Avoid a redundant call to convert_format_name_to_system_name.
	Avoid abbreviating the word "arguments" in a diagnostic.
	(format_warning_substr): New function.
	(avoid_dollar_number): Quote dollar sign in a diagnostic.
	(finish_dollar_format_checking): Same.
	(check_format_info): Same.
	(struct baltoks_t): New.
	(c_opers, c_keywords, cxx_keywords, badwords, contrs): New arrays.
	(maybe_diag_unbalanced_tokens, check_tokens, check_plain): New
	functions.
	(check_format_info_main): Call check_plain.  Use baltoks_t.  Call
	maybe_diag_unbalanced_tokens.
	(handle_format_attribute): Spell out the word "arguments" in
	a diagnostic.

gcc/testsuite/ChangeLog:
	* gcc.dg/format/gcc_diag-11.c: New test.

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index a7f76c1c01d..713fce442c9 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -52,7 +52,13 @@ enum format_type { printf_format_type, asm_fprintf_format_type,
 
 struct function_format_info
 {
-  int format_type;			/* type of format (printf, scanf, etc.) */
+  enum format_type format_type;		/* type of format (printf, scanf, etc.) */
+  /* IS_RAW is relevant only for GCC diagnostic format functions.
+     It is set for "raw" formatting functions like pp_printf that
+     are not intended to produce complete diagnostics according to
+     GCC guidelines, and clear for others like error and warning
+     whose format string is checked for proper quoting and spelling.  */
+  bool is_raw;
   unsigned HOST_WIDE_INT format_num;	/* number of format argument */
   unsigned HOST_WIDE_INT first_arg_num;	/* number of first arg (zero for varargs) */
 };
@@ -65,7 +71,7 @@ static GTY(()) tree locus;
 
 static bool decode_format_attr (const_tree, tree, tree, function_format_info *,
 				bool);
-static int decode_format_type (const char *);
+static format_type decode_format_type (const char *, bool * = NULL);
 
 static bool check_format_string (const_tree argument,
 				 unsigned HOST_WIDE_INT format_num,
@@ -111,6 +117,32 @@ format_warning_at_char (location_t fmt_string_loc, tree format_string_cst,
   return warned;
 }
 
+
+/* Emit a warning as per format_warning_va, but construct the substring_loc
+   for the substring at offset (POS1, POS2 - 1) within a string constant
+   FORMAT_STRING_CST at FMT_STRING_LOC.  */
+
+ATTRIBUTE_GCC_DIAG (6,7)
+static bool
+format_warning_substr (location_t fmt_string_loc, tree format_string_cst,
+		       int pos1, int pos2, int opt, const char *gmsgid, ...)
+{
+  va_list ap;
+  va_start (ap, gmsgid);
+  tree string_type = TREE_TYPE (format_string_cst);
+
+  pos2 -= 1;
+
+  substring_loc fmt_loc (fmt_string_loc, string_type, pos1, pos1, pos2);
+  format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL,
+				   NULL);
+  bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
+  va_end (ap);
+
+  return warned;
+}
+
+
 /* Check that we have a pointer to a string suitable for use as a format.
    The default is to check for a char type.
    For objective-c dialects, this is extended to include references to string
@@ -320,10 +352,8 @@ decode_format_attr (const_tree fntype, tree atname, tree args,
     {
       const char *p = IDENTIFIER_POINTER (format_type_id);
 
-      p = convert_format_name_to_system_name (p);
+      info->format_type = decode_format_type (p, &info->is_raw);
 
-      info->format_type = decode_format_type (p);
-      
       if (!c_dialect_objc ()
 	   && info->format_type == gcc_objc_string_format_type)
 	{
@@ -359,7 +389,7 @@ decode_format_attr (const_tree fntype, tree atname, tree args,
   if (info->first_arg_num != 0 && info->first_arg_num <= info->format_num)
     {
       gcc_assert (!validated_p);
-      error ("format string argument follows the args to be formatted");
+      error ("format string argument follows the arguments to be formatted");
       return false;
     }
 
@@ -1067,27 +1097,55 @@ static void format_type_warning (const substring_loc &fmt_loc,
 				 char conversion_char);
 
 /* Decode a format type from a string, returning the type, or
-   format_type_error if not valid, in which case the caller should print an
-   error message.  */
-static int
-decode_format_type (const char *s)
+   format_type_error if not valid, in which case the caller should
+   print an error message.  On success, when IS_RAW is non-null, set
+   *IS_RAW when the format type corresponds to a GCC "raw" diagnostic
+   formatting function and clear it otherwise.  */
+static format_type
+decode_format_type (const char *s, bool *is_raw /* = NULL */)
 {
-  int i;
-  int slen;
+  bool is_raw_buf;
+
+  if (!is_raw)
+    is_raw = &is_raw_buf;
+
+  *is_raw = false;
 
   s = convert_format_name_to_system_name (s);
-  slen = strlen (s);
-  for (i = 0; i < n_format_types; i++)
+
+  size_t slen = strlen (s);
+  for (int i = 0; i < n_format_types; i++)
     {
-      int alen;
+      /* Check for a match with no underscores.  */
       if (!strcmp (s, format_types[i].name))
-	return i;
-      alen = strlen (format_types[i].name);
+	return static_cast<format_type> (i);
+
+      /* Check for leading and trailing underscores.  */
+      size_t alen = strlen (format_types[i].name);
       if (slen == alen + 4 && s[0] == '_' && s[1] == '_'
 	  && s[slen - 1] == '_' && s[slen - 2] == '_'
 	  && !strncmp (s + 2, format_types[i].name, alen))
-	return i;
+	return static_cast<format_type>(i);
+
+      /* Check for the "_raw" suffix and no leading underscores.  */
+      if (slen == alen + 4
+	  && !strncmp (s, format_types[i].name, alen)
+	  && !strcmp (s + alen, "_raw"))
+	{
+	  *is_raw = true;
+	  return static_cast<format_type>(i);
+	}
+
+      /* Check for the "_raw__" suffix and leading underscores.  */
+      if (slen == alen + 8 && s[0] == '_' && s[1] == '_'
+	  && !strncmp (s + 2, format_types[i].name, alen)
+	  && !strcmp (s + 2 + alen, "_raw__"))
+	{
+	  *is_raw = true;
+	  return static_cast<format_type>(i);
+	}
     }
+
   return format_type_error;
 }
 
@@ -1350,7 +1408,8 @@ avoid_dollar_number (const char *format)
     format++;
   if (*format == '$')
     {
-      warning (OPT_Wformat_, "$ operand number used after format without operand number");
+      warning (OPT_Wformat_,
+	       "%<$%>operand number used after format without operand number");
       return true;
     }
   return false;
@@ -1381,7 +1440,8 @@ finish_dollar_format_checking (format_check_results *res, int pointer_gap_ok)
 	    found_pointer_gap = true;
 	  else
 	    warning_at (res->format_string_loc, OPT_Wformat_,
-			"format argument %d unused before used argument %d in $-style format",
+			"format argument %d unused before used argument %d "
+			"in %<$%>-style format",
 			i + 1, dollar_max_arg_used);
 	}
     }
@@ -1525,7 +1585,8 @@ check_format_info (function_format_info *info, tree params,
     }
   if (res.number_dollar_extra_args > 0 && res.number_non_literal == 0
       && res.number_other == 0)
-    warning_at (loc, OPT_Wformat_extra_args, "unused arguments in $-style format");
+    warning_at (loc, OPT_Wformat_extra_args,
+		"unused arguments in %<$%>-style format");
   if (res.number_empty > 0 && res.number_non_literal == 0
       && res.number_other == 0)
     warning_at (loc, OPT_Wformat_zero_length, "zero-length %s format string",
@@ -2789,6 +2850,904 @@ check_argument_type (const format_char_info *fci,
   return true;
 }
 
+/* Describes "paired tokens" within the format string that are
+   expected to be balanced.  */
+
+struct baltoks_t
+{
+  baltoks_t (): singlequote (), doublequote () { }
+
+  typedef auto_vec<const char *> balanced_tokens_t;
+  /* Vectors of pointers to opening opening brackets ('['), curly
+     brackets ('{'), quoting directives (like GCC "%<"), parentheses,
+     and angle brackets ('<').  Used to detect unbalanced tokens.  */
+  balanced_tokens_t brackets;
+  balanced_tokens_t curly;
+  balanced_tokens_t quotdirs;
+  balanced_tokens_t parens;
+  balanced_tokens_t pointy;
+  /* Pointer to the last opening quote.  */
+  const char *singlequote;
+  const char *doublequote;
+};
+
+/* Describes a keyword, operator, or other name.  */
+
+struct token_t
+{
+  const char *name;   /* Keyword/operator name.  */
+  unsigned char len;  /* Its length.  */
+  const char *alt;    /* Alternate spelling.  */
+};
+
+/* Helper for initializing global token_t arrays below.  */
+#define NAME(name) { name, sizeof name - 1, NULL }
+
+/* C/C++ operators that are expected to be quoted within the format
+   string.  */
+
+static const token_t c_opers[] =
+  {
+   NAME ("!="), NAME ("%="),  NAME ("&&"),  NAME ("&="), NAME ("*="),
+   NAME ("++"), NAME ("+="),  NAME ("--"),  NAME ("-="), NAME ("->"),
+   NAME ("/="), NAME ("<<"),  NAME ("<<="), NAME ("<="), NAME ("=="),
+   NAME (">="), NAME (">>="), NAME (">>"),  NAME ("?:"),  NAME ("^="),
+   NAME ("|="), NAME ("||")
+  };
+
+static const token_t cxx_opers[] =
+  {
+   NAME ("->*"), NAME (".*"),  NAME ("::"),  NAME ("<=>")
+  };
+
+/* Common C/C++ keywords that are expected to be quoted within the format
+   string.  Keywords like auto, inline, or volatile are exccluded because
+   they are sometimes used in common terms like /auto variables/, /inline
+   function/, or /volatile access/ where they should not be quoted.  */
+
+static const token_t c_keywords[] =
+  {
+#undef NAME
+#define NAME(name, alt)  { name, sizeof name - 1, alt }
+
+   NAME ("alignas", NULL),
+   NAME ("alignof", NULL),
+   NAME ("asm", NULL),
+   NAME ("bool", NULL),
+   NAME ("char", NULL),
+   NAME ("const %", NULL),
+   NAME ("const-qualified", "%<const%>-qualified"),
+   NAME ("float", NULL),
+   NAME ("ifunc", NULL),
+   NAME ("int", NULL),
+   NAME ("long double", NULL),
+   NAME ("long int", NULL),
+   NAME ("long long", NULL),
+   NAME ("malloc", NULL),
+   NAME ("noclone", NULL),
+   NAME ("noinline", NULL),
+   NAME ("nonnull", NULL),
+   NAME ("noreturn", NULL),
+   NAME ("nothrow", NULL),
+   NAME ("offsetof", NULL),
+   NAME ("readonly", "read-only"),
+   NAME ("readwrite", "read-write"),
+   NAME ("restrict %", NULL),
+   NAME ("restrict-qualified", "%<restrict%>-qualified"),
+   NAME ("short int", NULL),
+   NAME ("signed char", NULL),
+   NAME ("signed int", NULL),
+   NAME ("signed long", NULL),
+   NAME ("signed short", NULL),
+   NAME ("sizeof", NULL),
+   NAME ("typeof", NULL),
+   NAME ("unsigned char", NULL),
+   NAME ("unsigned int", NULL),
+   NAME ("unsigned long", NULL),
+   NAME ("unsigned short", NULL),
+   NAME ("volatile %", NULL),
+   NAME ("volatile-qualified", "%<volatile%>-qualified"),
+   NAME ("weakref", NULL),
+  };
+
+static const token_t cxx_keywords[] =
+  {
+   /* C++ only keywords and operators.  */
+   NAME ("catch", NULL),
+   NAME ("constexpr if", NULL),
+   NAME ("constexpr", NULL),
+   NAME ("consteval", NULL),
+   NAME ("decltype", NULL),
+   NAME ("nullptr", NULL),
+   NAME ("operator delete", NULL),
+   NAME ("operator new", NULL),
+   NAME ("typeid", NULL),
+   NAME ("typeinfo", NULL)
+  };
+
+/* Blacklisted words such as misspellings that should be avoided in favor
+   of the specified alternatives.  */
+static const struct
+{
+  const char *name;   /* Bad word or contraction.  */
+  unsigned char len;  /* Its length.  */
+  const char *alt;    /* Preferred alternative.  */
+} badwords[] =
+  {
+   NAME ("arg", "argument"),
+   NAME ("bitfield", "bit-field"),
+   NAME ("builtin function", "built-in function"),
+   NAME ("can not", "cannot"),
+   NAME ("commandline option", "command-line option"),
+   NAME ("commandline", "command line"),
+   NAME ("command line option", "command-line option"),
+   NAME ("decl", "declaration"),
+   NAME ("enumeral", "enumerated"),
+   NAME ("floating point", "floating-point"),
+   NAME ("non-zero", "nonzero"),
+   NAME ("reg", "register"),
+   NAME ("stmt", "statement"),
+  };
+
+/* Common contractions that should be avoided in favor of the specified
+   alternatives.  */
+
+static const struct
+{
+  const char *name;   /* Bad word or contraction.  */
+  unsigned char len;  /* Its length.  */
+  const char *alt;    /* Preferred alternative.  */
+} contrs[] =
+  {
+   NAME ("can't", "cannot"),
+   NAME ("didn't", "did not"),
+   /* These are commonly abused.  Avoid diagnosing them for now.
+      NAME ("isn't", "is not"),
+      NAME ("don't", "is not"),
+   */
+   NAME ("mustn't", "must not"),
+   NAME ("needn't", "need not"),
+   NAME ("should't", "should not"),
+   NAME ("that's", "that is"),
+   NAME ("there's", "there is"),
+   NAME ("they're", "they are"),
+   NAME ("what's", "what is"),
+   NAME ("won't", "will not")
+  };
+
+/* Check for unquoted TOKENS.  FORMAT_STRING_LOC is the location of
+   the format string, FORMAT_STRING_CST the format string itself (as
+   a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are pointers to
+   the beginning of the format string and the character currently
+   being processed, and BALTOKS describes paired "tokens" within
+   the format string that are expected to be balanced.
+   Returns a pointer to the last processed character or null when
+   nothing was done.  */
+
+static const char*
+check_tokens (const token_t *tokens, unsigned ntoks,
+	      location_t format_string_loc, tree format_string_cst,
+	      const char *orig_format_chars, const char *format_chars,
+	      baltoks_t &baltoks)
+{
+  /* For brevity.  */
+  const int opt = OPT_Wformat_diag;
+  /* Zero-based starting position of a problem sequence.  */
+  int fmtchrpos = format_chars - orig_format_chars;
+
+  /* For identifier-like "words," set to the word length.  */
+  unsigned wlen = 0;
+  /* Set for an operator, clear for an identifier/word.  */
+  bool is_oper = false;
+  bool underscore = false;
+
+  if (format_chars[0] == '_' || ISALPHA (format_chars[0]))
+    {
+      while (format_chars[wlen] == '_' || ISALNUM (format_chars[wlen]))
+	{
+	  underscore |= format_chars[wlen] == '_';
+	  ++wlen;
+	}
+    }
+  else
+    is_oper = true;
+
+  for (unsigned i = 0; i != ntoks; ++i)
+    {
+      unsigned toklen = tokens[i].len;
+
+      if (toklen < wlen
+	  || strncmp (format_chars, tokens[i].name, toklen))
+	continue;
+
+      if (toklen == 2
+	  && format_chars - orig_format_chars > 0
+	  && (TOUPPER (format_chars[-1]) == 'C'
+	      || TOUPPER (format_chars[-1]) == 'G'))
+	return format_chars + toklen - 1;   /* Reference to C++ or G++.  */
+
+      if (ISPUNCT (format_chars[toklen - 1]))
+	{
+	  if (format_chars[toklen - 1] == format_chars[toklen])
+	    return NULL;   /* Operator followed by another punctuator.  */
+	}
+      else if (ISALNUM (format_chars[toklen]))
+	return NULL;   /* Keyword prefix for a longer word.  */
+
+      if (toklen == 2
+	  && format_chars[0] == '-'
+	  && format_chars[1] == '-'
+	  && ISALNUM (format_chars[2]))
+	return NULL;   /* Probably option like --help.  */
+
+      /* Allow this ugly warning for the time being.  */
+      if (toklen == 2
+	  && format_chars - orig_format_chars > 6
+	  && !strncmp (format_chars - 7, " count >= width of ", 19))
+	return format_chars + 10;
+
+      /* The token is a type if it ends in an alphabetic character.  */
+      bool is_type = (ISALPHA (tokens[i].name[toklen - 1])
+		      && strchr (tokens[i].name, ' '));
+
+      /* Backtrack to the last alphabetic character (for tokens whose
+	 names ends in '%').  */
+      if (!is_oper)
+	while (!ISALPHA (tokens[i].name[toklen - 1]))
+	  --toklen;
+
+      if (format_warning_substr (format_string_loc, format_string_cst,
+				 fmtchrpos, fmtchrpos + toklen, opt,
+				 (is_type
+				  ? G_("unquoted type name %<%.*s%> in format")
+				  : (is_oper
+				     ? G_("unquoted operator %<%.*s%> in format")
+				     : G_("unquoted keyword %<%.*s%> in format"))),
+				 toklen, format_chars)
+	  && tokens[i].alt)
+	inform (format_string_loc, "use %qs instead", tokens[i].alt);
+
+      return format_chars + toklen - 1;
+    }
+
+  /* Diagnose unquoted __attribute__.  Consider any parenthesized
+     argument to the attribute to avoid redundant warnings for
+     the double parentheses that might follow.  */
+  if (!strncmp (format_chars, "__attribute", sizeof "__attribute" - 1))
+    {
+      unsigned nchars = sizeof "__attribute" - 1;
+      while ('_' == format_chars[nchars])
+	++nchars;
+
+      for (int i = nchars; format_chars[i]; ++i)
+	if (' ' != format_chars[i])
+	  {
+	    nchars = i;
+	    break;
+	  }
+
+      if (format_chars[nchars] == '(')
+	{
+	  baltoks.parens.safe_push (format_chars + nchars);
+
+	  ++nchars;
+	  bool close = false;
+	  if (format_chars[nchars] == '(')
+	    {
+	      baltoks.parens.safe_push (format_chars + nchars);
+	      close = true;
+	      ++nchars;
+	    }
+	  for (int i = nchars; format_chars[i]; ++i)
+	    if (')' == format_chars[i])
+	      {
+		if (baltoks.parens.length () > 0)
+		  baltoks.parens.pop ();
+		nchars = i + 1;
+		break;
+	      }
+
+	  if (close && format_chars[nchars] == ')')
+	    {
+	      if (baltoks.parens.length () > 0)
+		baltoks.parens.pop ();
+	      ++nchars;
+	    }
+	}
+
+      format_warning_substr (format_string_loc, format_string_cst,
+			     fmtchrpos, fmtchrpos + nchars, opt,
+			      "unquoted attribute in format");
+      return format_chars + nchars - 1;
+    }
+
+  /* Diagnose unquoted built-ins.  */
+  if (format_chars[0] == '_'
+      && format_chars[1] == '_'
+      && (!strncmp (format_chars + 2, "atomic", sizeof "atomic" - 1)
+	  || !strncmp (format_chars + 2, "builtin", sizeof "builtin" - 1)
+	  || !strncmp (format_chars + 2, "sync", sizeof "sync" - 1)))
+    {
+      format_warning_substr (format_string_loc, format_string_cst,
+			     fmtchrpos, fmtchrpos + wlen, opt,
+			     "unquoted name of built-in function %<%.*s%> "
+			     "in format",
+			     wlen, format_chars);
+      return format_chars + wlen - 1;
+    }
+
+  /* Diagnose unquoted substrings of alphanumeric characters containing
+     underscores.  They most likely refer to identifiers and should be
+     quoted.  */
+  if (underscore)
+    format_warning_substr (format_string_loc, format_string_cst,
+			   format_chars - orig_format_chars,
+			   format_chars + wlen - orig_format_chars,
+			   opt,
+			   "unquoted identifier or keyword %<%.*s%> in format",
+			   wlen, format_chars);
+  else
+    {
+      /* Diagnose some common missspellings.  */
+      for (unsigned i = 0; i != sizeof badwords / sizeof *badwords; ++i)
+	{
+	  unsigned badwlen = strspn (badwords[i].name, " -");
+	  if (wlen >= badwlen
+	      && (wlen <= badwords[i].len
+		  || (wlen == badwords[i].len + 1U
+		      && TOUPPER (format_chars[wlen - 1]) == 'S'))
+	      && !strncasecmp (format_chars, badwords[i].name, badwords[i].len))
+	    {
+	      /* Handle singular as well as plural forms of all bad words
+		 even though the latter don't necessarily make sense for
+		 all of the former (like "can nots").  */
+	      badwlen = badwords[i].len;
+	      const char *plural = "";
+	      if (TOUPPER (format_chars[badwlen]) == 'S')
+		{
+		  ++badwlen;
+		  plural = "s";
+		}
+
+	      format_warning_substr (format_string_loc, format_string_cst,
+				     fmtchrpos, fmtchrpos + badwords[i].len,
+				     opt,
+				     "misspelled term %<%.*s%> in format; "
+				     "use %<%s%s%> instead",
+				     badwlen, format_chars,
+				     badwords[i].alt, plural);
+
+	      return format_chars + badwords[i].len - 1;
+	    }
+	}
+
+      /* Skip C++/G++.  */
+      if (!strncasecmp (format_chars, "c++", 3)
+	  || !strncasecmp (format_chars, "g++", 3))
+	return format_chars + 2;
+    }
+
+  return wlen ? format_chars + wlen - 1 : NULL;
+}
+
+/* Check plain text in a format string of a GCC diagnostic function
+   for common quoting, punctuation, and spelling mistakes, and issue
+   -Wformat-diag warnings if they are found.   FORMAT_STRING_LOC is
+   the location of the format string, FORMAT_STRING_CST the format
+   string itself (as a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are
+   ponters to the beginning of the format string and the character
+   currently being processed, and BALTOKS describes paired "tokens"
+   within the format string that are expected to be balanced.
+   Returns a pointer to the last processed character.  */
+
+static const char*
+check_plain (location_t format_string_loc, tree format_string_cst,
+	     const char *orig_format_chars, const char *format_chars,
+	     baltoks_t &baltoks)
+{
+  /* For brevity.  */
+  const int opt = OPT_Wformat_diag;
+  /* Zero-based starting position of a problem sequence.  */
+  int fmtchrpos = format_chars - orig_format_chars;
+
+  if (*format_chars == '%')
+    {
+      /* Diagnose %<%s%> and suggest using %qs instead.  */
+      if (!strncmp (format_chars, "%<%s%>", 6))
+	format_warning_substr (format_string_loc, format_string_cst,
+			       fmtchrpos, fmtchrpos + 6, opt,
+			       "quoted %qs directive in format; "
+			       "use %qs instead", "%s", "%qs");
+      else if (format_chars - orig_format_chars > 2
+	       && !strncasecmp (format_chars - 3, "can%'t", 5)
+	       && !ISALPHA (format_chars[1]))
+	format_warning_substr (format_string_loc,
+			       format_string_cst,
+			       fmtchrpos - 3, fmtchrpos + 3, opt,
+			       "contraction %<%.*s%> in format; "
+			       "use %qs instead",
+			       6, format_chars - 3, "cannot");
+
+      return format_chars;
+    }
+
+  if (baltoks.quotdirs.length ())
+    {
+      /* Skip over all plain text within a quoting directive until
+	 the next directive.  */
+      while (*format_chars && '%' != *format_chars)
+	++format_chars;
+
+      return format_chars;
+    }
+
+  /* The length of the problem sequence.  */
+  int nchars = 0;
+
+  /* Diagnose any whitespace characters other than <space> but only
+     leading, trailing, and two or more consecutive <space>s.  Do
+     this before diagnosing control characters because whitespace
+     is a subset of controls.  */
+  const char *other_than_space = NULL;
+  while (ISSPACE (format_chars[nchars]))
+    {
+      if (format_chars[nchars] != ' ' && !other_than_space)
+	other_than_space = format_chars + nchars;
+      ++nchars;
+    }
+
+  if (nchars)
+    {
+      /* This is the most common problem: go the extra mile to decribe
+	 the problem in as much helpful detail as possible.  */
+      if (other_than_space)
+	{
+	  format_warning_substr (format_string_loc, format_string_cst,
+				 fmtchrpos, fmtchrpos + nchars, opt,
+				 "unquoted whitespace character %qc in format",
+				 *other_than_space);
+	  return format_chars + nchars - 1;
+	}
+
+      if (fmtchrpos == 0)
+	/* Accept strings of leading spaces with no warning.  */
+	return format_chars + nchars - 1;
+
+      if (!format_chars[nchars])
+	{
+	  format_warning_substr (format_string_loc, format_string_cst,
+				 fmtchrpos, fmtchrpos + nchars, opt,
+				 "spurious trailing space in format");
+	  return format_chars + nchars - 1;
+	}
+
+      if (nchars > 1)
+	{
+	  if (nchars == 2
+	      && orig_format_chars < format_chars
+	      && format_chars[-1] == '.'
+	      && format_chars[0] == ' '
+	      && format_chars[1] == ' ')
+	    {
+	      /* A period followed by two spaces.  */
+	      if (ISUPPER (*orig_format_chars))
+		{
+		  /* If the part before the period is a capitalized
+		     sentence check to make sure that what follows
+		     is also capitalized.  */
+		  if (ISLOWER (format_chars[2]))
+		    format_warning_substr (format_string_loc, format_string_cst,
+					   fmtchrpos, fmtchrpos + nchars, opt,
+					   "inconsistent capitalization in "
+					   "format");
+		}
+	    }
+	  else
+	    format_warning_substr (format_string_loc, format_string_cst,
+				   fmtchrpos, fmtchrpos + nchars, opt,
+				   "unquoted sequence of %i consecutive "
+				   "space characters in format", nchars);
+	  return format_chars + nchars - 1;
+	}
+
+      format_chars += nchars;
+      nchars = 0;
+    }
+
+  fmtchrpos = format_chars - orig_format_chars;
+
+  /* Diagnose any unquoted control characters other than the terminating
+     NUL.  */
+  while (format_chars[nchars] && ISCNTRL (format_chars[nchars]))
+    ++nchars;
+
+  if (nchars > 1)
+    {
+      format_warning_substr (format_string_loc, format_string_cst,
+			     fmtchrpos, fmtchrpos + nchars, opt,
+			     "unquoted control characters in format");
+      return format_chars + nchars - 1;
+    }
+  if (nchars)
+    {
+      format_warning_substr (format_string_loc, format_string_cst,
+			     fmtchrpos, fmtchrpos + nchars, opt,
+			     "unquoted control character %qc in format",
+			     *format_chars);
+      return format_chars + nchars - 1;
+    }
+
+  if (ISPUNCT (format_chars[0]))
+    {
+      size_t nelts = sizeof c_opers / sizeof *c_opers;
+      if (const char *ret = check_tokens (c_opers, nelts,
+					  format_string_loc, format_string_cst,
+					  orig_format_chars, format_chars,
+					  baltoks))
+	return ret;
+
+      nelts = c_dialect_cxx () ? sizeof cxx_opers / sizeof *cxx_opers : 0;
+      if (const char *ret = check_tokens (cxx_opers, nelts,
+					  format_string_loc, format_string_cst,
+					  orig_format_chars, format_chars,
+					  baltoks))
+	return ret;
+    }
+
+  if (ISALPHA (format_chars[0]))
+    {
+      size_t nelts = sizeof c_keywords / sizeof *c_keywords;
+      if (const char *ret = check_tokens (c_keywords, nelts,
+					  format_string_loc, format_string_cst,
+					  orig_format_chars, format_chars,
+					  baltoks))
+	return ret;
+
+      nelts = c_dialect_cxx () ? sizeof cxx_keywords / sizeof *cxx_keywords : 0;
+      if (const char *ret = check_tokens (cxx_keywords, nelts,
+					  format_string_loc, format_string_cst,
+					  orig_format_chars, format_chars,
+					  baltoks))
+	return ret;
+    }
+
+  nchars = 0;
+
+  /* Diagnose unquoted options.  */
+  if  ((format_chars == orig_format_chars
+	|| format_chars[-1] == ' ')
+       && format_chars[0] == '-'
+       && ((format_chars[1] == '-'
+	    && ISALPHA (format_chars[2]))
+	   || ISALPHA (format_chars[1])))
+    {
+      nchars = 1;
+      while (ISALNUM (format_chars[nchars])
+	     || '_' == format_chars[nchars]
+	     || '-' == format_chars[nchars]
+	     || '+' == format_chars[nchars])
+	++nchars;
+
+      format_warning_substr (format_string_loc, format_string_cst,
+			     fmtchrpos, fmtchrpos + nchars, opt,
+			     "unquoted option name %<%.*s%> in format",
+			     nchars, format_chars);
+      return format_chars + nchars - 1;
+    }
+
+  /* Diagnose leading, trailing, and two or more consecutive punctuation
+     characters.  */
+  const char *unbalanced = NULL;
+  while ('%' != format_chars[nchars]
+	 && ISPUNCT (format_chars[nchars])
+	 && !unbalanced)
+    {
+      switch (format_chars[nchars])
+	{
+	case '[':
+	  baltoks.brackets.safe_push (format_chars + nchars);
+	  break;
+	case '{':
+	  baltoks.curly.safe_push (format_chars + nchars);
+	  break;
+	case '(':
+	  baltoks.parens.safe_push (format_chars + nchars);
+	  break;
+	case '<':
+	  baltoks.pointy.safe_push (format_chars + nchars);
+	  break;
+
+	case ']':
+	  if (baltoks.brackets.length () > 0)
+	    baltoks.brackets.pop ();
+	  else
+	    unbalanced = format_chars + nchars;
+	  break;
+	case '}':
+	  if (baltoks.curly.length () > 0)
+	    baltoks.curly.pop ();
+	  else
+	    unbalanced = format_chars + nchars;
+	  break;
+	case ')':
+	  if (baltoks.parens.length () > 0)
+	    baltoks.parens.pop ();
+	  else
+	    unbalanced = format_chars + nchars;
+	  break;
+	case '>':
+	  if (baltoks.pointy.length () > 0)
+	    baltoks.pointy.pop ();
+	  else
+	    unbalanced = format_chars + nchars;
+	  break;
+	}
+
+      ++nchars;
+    }
+
+  if (unbalanced)
+    {
+      format_warning_substr (format_string_loc, format_string_cst,
+			     fmtchrpos, fmtchrpos + nchars, opt,
+			     "unbalanced punctuation character %qc in format",
+			     *unbalanced);
+      return format_chars + nchars - 1;
+    }
+
+  if (nchars)
+    {
+      /* Consider any identifier that follows the pound ('#') sign
+	 a preprocessing directive.  */
+      if (nchars == 1
+	  && format_chars[0] == '#'
+	  && ISALPHA (format_chars[1]))
+	{
+	  while (ISALNUM (format_chars[nchars])
+		 || format_chars[nchars] == '_')
+	    ++nchars;
+
+	  format_warning_substr (format_string_loc, format_string_cst,
+				 fmtchrpos, fmtchrpos + nchars, opt,
+				 "unquoted preprocessing directive %<%.*s%> "
+				 "in format", nchars, format_chars);
+	  return format_chars + nchars - 1;
+	}
+
+      /* Diagnose a bare single quote.  */
+      if (nchars == 1
+	  && format_chars[0] == '\''
+	  && format_chars - orig_format_chars
+	  && ISALPHA (format_chars[-1])
+	  && ISALPHA (format_chars[1]))
+	{
+	  /* Diagnose a subset of contractions that are best avoided. */
+	  for (unsigned i = 0; i != sizeof contrs / sizeof *contrs; ++i)
+	    {
+	      const char *apos = strchr (contrs[i].name, '\'');
+	      gcc_assert (apos != NULL);
+	      int off = apos - contrs[i].name;
+
+	      if (format_chars - orig_format_chars >= off
+		  && !strncmp (format_chars - off,
+			       contrs[i].name, contrs[i].len))
+		{
+		  format_warning_substr (format_string_loc,
+					 format_string_cst,
+					 fmtchrpos, fmtchrpos + nchars, opt,
+					 "contraction %<%.*s%> in format; "
+					 "use %qs instead",
+					 contrs[i].len, contrs[i].name,
+					 contrs[i].alt);
+		  return format_chars + nchars - 1;
+		}
+	    }
+
+	  if (format_warning_substr (format_string_loc, format_string_cst,
+				     fmtchrpos, fmtchrpos + nchars, opt,
+				     "bare apostrophe %<'%> in format"))
+	    inform (format_string_loc,
+		    "if avoiding the apostrophe is not feasible, enclose "
+		    "it in a pair of %qs and %qs directives instead",
+		    "%<", "%>");
+	  return format_chars + nchars - 1;
+	}
+
+      /* Diagnose a backtick (grave accent).  */
+      if (nchars == 1
+	  && format_chars[0] == '`')
+	{
+	  if (format_warning_substr (format_string_loc, format_string_cst,
+				     fmtchrpos, fmtchrpos + nchars, opt,
+				     "grave accent %<`%> in format"))
+	    inform (format_string_loc,
+		    "use the apostrophe directive %qs instead", "%'");
+	  return format_chars + nchars - 1;
+	}
+
+      /* Diagnose a punctuation character after a space.  */
+      if (nchars == 1
+	  && format_chars - orig_format_chars
+	  && format_chars[-1] == ' '
+	  && strspn (format_chars, "!?:;.,") == 1)
+	{
+	  format_warning_substr (format_string_loc, format_string_cst,
+				 fmtchrpos - 1, fmtchrpos, opt,
+				 "space followed by punctuation character "
+				 "%<%c%>", format_chars[0]);
+	  return format_chars;
+	}
+
+      if (nchars == 1)
+	{
+	  if (!strncmp (format_chars, "\"%s\"", 4))
+	    {
+	      if (format_warning_substr (format_string_loc, format_string_cst,
+					 fmtchrpos, fmtchrpos + 4, opt,
+					 "quoted %qs directive in format",
+					 "%s"))
+		inform (format_string_loc, "if using %qs is not feasible, "
+			"use %qs instead", "%qs", "\"%-s\"");
+	    }
+
+	  if (format_chars[0] == '"')
+	    {
+	      baltoks.doublequote = baltoks.doublequote ? NULL : format_chars;
+	      return format_chars + nchars - 1;
+	    }
+	  if (format_chars[0] == '\'')
+	    {
+	      baltoks.singlequote = baltoks.singlequote ? NULL : format_chars;
+	      return format_chars + nchars - 1;
+	    }
+	}
+
+      if (fmtchrpos == 0)
+	{
+	  if (nchars == 1
+	      && format_chars[0] == '(')
+	    ;   /* Text beginning in an open parenthesis.  */
+	  else if (nchars == 3
+	      && !strncmp (format_chars, "...", 3)
+	      && format_chars[3])
+	    ;   /* Text beginning in an ellipsis.  */
+	  else
+	    {
+	      format_warning_substr (format_string_loc, format_string_cst,
+				     fmtchrpos, fmtchrpos + nchars, opt,
+				     "spurious leading punctuation sequence "
+				     "%<%.*s%> in format",
+				     nchars, format_chars);
+	      return format_chars + nchars - 1;
+	    }
+	}
+      else if (!format_chars[nchars])
+	{
+	  if (nchars == 1
+	      && (format_chars[nchars - 1] == ':'
+		  || format_chars[nchars - 1] == ')'))
+	    ;   /* Text ending in a colon or a closing parenthesis.  */
+	  else if (nchars == 1
+		   && ((ISUPPER (*orig_format_chars)
+			&& format_chars[nchars - 1] == '.')
+		       || strspn (format_chars + nchars - 1, "?])") == 1))
+		  ;   /* Capitalized sentence terminated by a single period,
+			 or text ending in a question mark, closing bracket,
+			 or parenthesis.  */
+	  else if (nchars == 2
+		   && format_chars[0] == '?'
+		   && format_chars[1] == ')')
+	    ;   /* A question mark after a closing parenthetical note.  */
+	  else if (nchars == 2
+		   && format_chars[0] == ')'
+		   && (format_chars[1] == '?'
+		       || format_chars[1] == ';'
+		       || format_chars[1] == ':'
+		       || (ISUPPER (*orig_format_chars)
+			   && format_chars[1] == '.')))
+	    ;   /* Closing parethetical note followed by a question mark,
+		   semicolon, or colon at the end of the string, or by
+		   a period at the end of a capitalized sentence.  */
+	  else if (nchars == 3
+		   && format_chars - orig_format_chars > 0
+		   && !strncmp (format_chars, "...", 3))
+	    ;   /* Text ending in the ellipsis.  */
+	  else
+	    format_warning_substr (format_string_loc, format_string_cst,
+				   fmtchrpos, fmtchrpos + nchars, opt,
+				   "spurious trailing punctuation sequence "
+				   "%<%.*s%> in format",
+				   nchars, format_chars);
+
+	  return format_chars + nchars - 1;
+	}
+      else if (nchars == 2
+	       && format_chars[0] == ')'
+	       && (format_chars[1] == ':'
+		   || format_chars[1] == ';'
+		   || format_chars[1] == ',')
+	       && format_chars[2] == ' ')
+	;   /* Closing parethetical note followed by a colon, semicolon
+	       or a comma followed by a space in the middle of the string.  */
+      else if (nchars > 1)
+	format_warning_substr (format_string_loc, format_string_cst,
+			       fmtchrpos, fmtchrpos + nchars, opt,
+			       "unquoted sequence of %i consecutive "
+			       "punctuation characters %q.*s in format",
+			       nchars, nchars, format_chars);
+      return format_chars + nchars - 1;
+    }
+
+  nchars = 0;
+
+  /* Finally, diagnose any unquoted non-graph, non-punctuation characters
+     other than the terminating NUL.  */
+  while (format_chars[nchars]
+	 && '%' != format_chars[nchars]
+	 && !ISPUNCT (format_chars[nchars])
+	 && !ISGRAPH (format_chars[nchars]))
+    ++nchars;
+
+  if (nchars > 1)
+    {
+      format_warning_substr (format_string_loc, format_string_cst,
+			     fmtchrpos, fmtchrpos + nchars, opt,
+			     "unquoted non-graph characters in format");
+      return format_chars + nchars - 1;
+    }
+  if (nchars)
+    {
+      format_warning_substr (format_string_loc, format_string_cst,
+			     fmtchrpos, fmtchrpos + nchars, opt,
+			     "unquoted non-graph character %qc in format",
+			     *format_chars);
+      return format_chars + nchars - 1;
+    }
+
+  return format_chars;
+}
+
+static void
+maybe_diag_unbalanced_tokens (location_t format_string_loc,
+			      const char *orig_format_chars,
+			      tree format_string_cst,
+			      baltoks_t &baltoks)
+{
+  const char *unbalanced = NULL;
+
+  if (baltoks.brackets.length ())
+    unbalanced = baltoks.brackets.pop ();
+  else if (baltoks.curly.length ())
+    unbalanced = baltoks.curly.pop ();
+  else if (baltoks.parens.length ())
+    unbalanced = baltoks.parens.pop ();
+  else if (baltoks.pointy.length ())
+    unbalanced = baltoks.pointy.pop ();
+
+  if (unbalanced)
+    format_warning_at_char (format_string_loc, format_string_cst,
+			    unbalanced - orig_format_chars + 1,
+			    OPT_Wformat_diag,
+			    "unbalanced punctuation character %<%c%> in format",
+			    *unbalanced);
+
+  if (baltoks.quotdirs.length ())
+    format_warning_at_char (format_string_loc, format_string_cst,
+			    baltoks.quotdirs.pop () - orig_format_chars,
+			    OPT_Wformat_,
+			    "unterminated quoting directive");
+
+  const char *quote
+    = baltoks.singlequote ? baltoks.singlequote : baltoks.doublequote;
+
+  if (quote)
+    format_warning_at_char (format_string_loc, format_string_cst,
+  			    quote - orig_format_chars + 1,
+			    OPT_Wformat_diag,
+  			    "unterminated quote character %<%c%> in format",
+  			    *quote);
+}
+
 /* Do the main part of checking a call to a format function.  FORMAT_CHARS
    is the NUL-terminated format string (which at this point may contain
    internal NUL characters); FORMAT_LENGTH is its length (excluding the
@@ -2816,8 +3775,10 @@ check_format_info_main (format_check_results *res,
      and it didn't use $; 1 if $ formats are in use.  */
   int has_operand_number = -1;
 
-  /* Vector of pointers to opening quoting directives (like GCC "%<").  */
-  auto_vec<const char*> quotdirs;
+  /* Vectors of pointers to opening quoting directives (like GCC "%<"),
+     opening braces, brackets, and parentheses.  Used to detect unbalanced
+     tokens.  */
+  baltoks_t baltoks;
 
   /* Pointers to the most recent color directives (like GCC's "%r or %R").
      A starting color directive much be terminated before the end of
@@ -2828,10 +3789,26 @@ check_format_info_main (format_check_results *res,
 
   init_dollar_format_checking (info->first_arg_num, first_fillin_param);
 
+  /* In GCC diagnostic functions check plain directives (substrings within
+     the format string that don't start with %) for quoting and punctuations
+     problems.  */
+  bool ck_plain = (!info->is_raw
+		   && (info->format_type == gcc_diag_format_type
+		       || info->format_type == gcc_tdiag_format_type
+		       || info->format_type == gcc_cdiag_format_type
+		       || info->format_type == gcc_cxxdiag_format_type));
+
   while (*format_chars != 0)
     {
-      if (*format_chars++ != '%')
+      if (ck_plain)
+	format_chars = check_plain (format_string_loc,
+				    format_string_cst,
+				    orig_format_chars, format_chars,
+				    baltoks);
+
+      if (*format_chars == 0 || *format_chars++ != '%')
 	continue;
+
       if (*format_chars == 0)
 	{
 	  format_warning_at_char (format_string_loc, format_string_cst,
@@ -2846,6 +3823,8 @@ check_format_info_main (format_check_results *res,
 	  continue;
 	}
 
+      /* ARGUMENT_PARSER ctor takes FORMAT_CHARS by reference and calls
+	 to ARG_PARSER members may modify the variable.  */
       flag_chars_t flag_chars;
       argument_parser arg_parser (info, format_chars, format_string_cst,
 				  orig_format_chars, format_string_loc,
@@ -2908,7 +3887,7 @@ check_format_info_main (format_check_results *res,
       flag_chars.validate (fki, fci, flag_specs, format_chars,
 			   format_string_cst,
 			   format_string_loc, orig_format_chars, format_char,
-			   quotdirs.length () > 0);
+			   baltoks.quotdirs.length () > 0);
 
       const int alloc_flag = flag_chars.get_alloc_flag (fki);
       const bool suppressed = flag_chars.assignment_suppression_p (fki);
@@ -2920,17 +3899,17 @@ check_format_info_main (format_check_results *res,
 
       if (quot_begin_p && !quot_end_p)
 	{
-	  if (quotdirs.length ())
+	  if (baltoks.quotdirs.length ())
 	    format_warning_at_char (format_string_loc, format_string_cst,
 				    format_chars - orig_format_chars,
 				    OPT_Wformat_,
 				    "nested quoting directive");
-	  quotdirs.safe_push (format_chars);
+	  baltoks.quotdirs.safe_push (format_chars);
 	}
       else if (!quot_begin_p && quot_end_p)
 	{
-	  if (quotdirs.length ())
-	    quotdirs.pop ();
+	  if (baltoks.quotdirs.length ())
+	    baltoks.quotdirs.pop ();
 	  else
 	    format_warning_at_char (format_string_loc, format_string_cst,
 				    format_chars - orig_format_chars,
@@ -2962,7 +3941,7 @@ check_format_info_main (format_check_results *res,
 
       /* Diagnose directives that shouldn't appear in a quoted sequence.
 	 (They are denoted by a double quote in FLAGS2.)  */
-      if (quotdirs.length ())
+      if (baltoks.quotdirs.length ())
 	{
 	  if (strchr (fci->flags2, '"'))
 	    format_warning_at_char (format_string_loc, format_string_cst,
@@ -3018,10 +3997,9 @@ check_format_info_main (format_check_results *res,
   if (has_operand_number > 0)
     finish_dollar_format_checking (res, fki->flags & (int) FMT_FLAG_DOLLAR_GAP_POINTER_OK);
 
-  if (quotdirs.length ())
-    format_warning_at_char (format_string_loc, format_string_cst,
-			    quotdirs.pop () - orig_format_chars,
-			    OPT_Wformat_, "unterminated quoting directive");
+  maybe_diag_unbalanced_tokens (format_string_loc, orig_format_chars,
+				format_string_cst, baltoks);
+
   if (color_begin && !color_end)
     format_warning_at_char (format_string_loc, format_string_cst,
 			    color_begin - orig_format_chars,
@@ -4199,7 +5177,7 @@ handle_format_attribute (tree *node, tree atname, tree args,
 	  if (arg_num != info.first_arg_num)
 	    {
 	      if (!(flags & (int) ATTR_FLAG_BUILT_IN))
-		error ("args to be formatted is not %<...%>");
+		error ("argument to be formatted is not %<...%>");
 	      *no_add_attrs = true;
 	      return NULL_TREE;
 	    }
diff --git a/gcc/testsuite/gcc.dg/format/gcc_diag-11.c b/gcc/testsuite/gcc.dg/format/gcc_diag-11.c
new file mode 100644
index 00000000000..a976c7aa519
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/format/gcc_diag-11.c
@@ -0,0 +1,455 @@
+/* Test warnings for common punctuation, quoting, and spelling issues
+   in GCC diagnostics.
+   { dg-do compile }
+   { dg-options "-Wformat -Wformat-diag" } */
+
+/* Magic identifiers must be set before the attribute is used.  */
+
+typedef long long __gcc_host_wide_int__;
+
+typedef struct location_s
+{
+  const char *file;
+  int line;
+} location_t;
+
+union tree_node;
+typedef union tree_node *tree;
+
+/* Define gimple as a dummy type.  The typedef must be provided for
+   the C test to find the symbol.  */
+typedef struct gimple gimple;
+
+/* Likewise for cgraph_node.  */
+typedef struct cgraph_node cgraph_node;
+
+#define ATTR(...)    __attribute__ ((__VA_ARGS__))
+#define FORMAT(kind) ATTR (format (__gcc_## kind ##__, 1, 2))
+
+/* Raw formatting function like pp_format.  */
+void diag_raw (const char*, ...) ATTR (format (__gcc_diag_raw__, 1, 2));
+void cdiag_raw (const char*, ...) ATTR (format (__gcc_cdiag_raw__, 1, 2));
+void tdiag_raw (const char*, ...) ATTR (format (gcc_tdiag_raw, 1, 2));
+void cxxdiag_raw (const char*, ...) ATTR (format (gcc_cxxdiag_raw, 1, 2));
+
+/* Basic formatting function_format.  */
+void diag (const char*, ...) FORMAT (diag);
+
+/* Diagnostic formatting function like error or warning declared
+   by the C front end.  */
+void cdiag (const char*, ...) FORMAT (cdiag);
+
+/* Diagnostic formatting function like error or warning declared
+   by the middle-end or back-end.  */
+void tdiag (const char*, ...) FORMAT (tdiag);
+
+/* Diagnostic formatting function like error or warning declared
+   by the C++ front-end.  */
+void cxxdiag (const char*, ...) FORMAT (cxxdiag);
+
+
+/* Verify that functions declared with __gcc_diag_raw__ attribute
+   are not subject to -Wformat-diag.  */
+
+void test_diag_raw (tree t, gimple *gc)
+{
+  diag_raw ("a  b");
+  diag_raw ("newline\n");
+  diag_raw ("lone period.");
+  diag_raw ("multiple punctuators: !!!");
+  diag_raw ("unbalanced paren (");
+  diag_raw ("keyword alignas and identifier_with_underscores");
+  diag_raw ("disable __builtin_abs with the -fno-builtin-abs option");
+  diag_raw ("who says I can't have no stinkin' contractions? ");
+
+  cdiag_raw ("__atomic_sync (%qE) == 7???", t);
+  tdiag_raw ("__builtin_abs (%E) < 0!?!", t);
+  cxxdiag_raw ("template <> int f (%E", t);
+}
+
+/* Verify that functions declared with the C front-end __gcc_cdiag__
+   attribute detect invalid whitespace in format strings.  */
+
+void test_cdiag_whitespace (tree t, gimple *gc)
+{
+  (void)&t; (void)&gc;
+
+  /* Verify that strings of leading spaces don't trigger a warning.  */
+  cdiag (" a");
+  cdiag ("  b");
+  cdiag ("   c");
+  cdiag ("%< %>a");
+  cdiag ("%<  %>a");
+  cdiag ("a b");
+  cdiag ("a  b");           /* { dg-warning "unquoted sequence of 2 consecutive space characters" } */
+  cdiag ("a ");             /* { dg-warning "spurious trailing space" } */
+  cdiag ("a  ");            /* { dg-warning "spurious trailing space" } */
+  cdiag ("a%< %>");
+  cdiag ("a%< %>%< %>");
+  cdiag ("a%< %> ");        /* { dg-warning "spurious trailing space" } */
+  cdiag ("a%< %>  %< %>");  /* { dg-warning "unquoted sequence of 2 consecutive space characters" } */
+
+  /* It's debatable whether the following two formst strings should
+     be diagnosed.  They aren't only because it's simpler that way.  */
+  cdiag ("a %< %>");
+  cdiag ("a%< %> %< %>");
+
+  /* Exercise other whitespace characters.  */
+  cdiag ("a\fb");           /* { dg-warning "unquoted whitespace character '\\\\x0c'" } */
+  cdiag ("a\nb");           /* { dg-warning "unquoted whitespace character '\\\\x0a'" } */
+  cdiag ("a\rb");           /* { dg-warning "unquoted whitespace character '\\\\x0d'" } */
+  cdiag ("a\vb");           /* { dg-warning "unquoted whitespace character '\\\\x0b'" } */
+
+  cdiag ("First sentence.  And a next.");
+  cdiag ("First sentence.  not capitalized sentence"); /* { dg-warning "inconsistent capitalization" } */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-diag"
+
+  /* Verify that the warning can be suppressed.  */
+  cdiag ("\ta\b    c\vb\n");
+
+#pragma GCC diagnostic pop
+}
+
+
+void test_cdiag_control (tree t, gimple *gc)
+{
+  (void)&t; (void)&gc;
+
+  cdiag ("\1");             /* { dg-warning "unquoted control character '\\\\x01'" } */
+  cdiag ("a\ab");           /* { dg-warning "unquoted control character '\\\\x07'" } */
+  cdiag ("a\bb");           /* { dg-warning "unquoted control character '\\\\x08'" } */
+}
+
+
+void test_cdiag_punct (tree t, gimple *gc, int i)
+{
+  (void)&t; (void)&gc;
+
+  /* Exercise the period.  */
+  cdiag (".abc");           /* { dg-warning "spurious leading punctuation sequence .\.." } */
+  cdiag ("abc;");           /* { dg-warning "spurious trailing punctuation sequence .;." } */
+  /* Verify that sentences that start with an uppercase letter and end
+     in a period are not diagnosed.  */
+  cdiag ("This is a full sentence.");
+  cdiag ("Capitalized sentence (with a parethetical note).");
+  cdiag ("Not a full sentence;");   /* { dg-warning "spurious trailing punctuation sequence .;." } */
+  cdiag ("Neither is this one,");   /* { dg-warning "spurious trailing punctuation sequence .,." } */
+
+  /* Exercise the ellipsis.  */
+  cdiag ("this message...");
+  cdiag ("...continues here");
+  cdiag ("but...not here"); /* { dg-warning "unquoted sequence of 3 consecutive punctuation characters" } */
+
+  /* Verify that parenthesized sentences are accepted, even the whole
+     meesage (done in the C++ front end).  */
+  cdiag ("null argument where non-null required (argument %i)", i);
+  cdiag ("null (argument %i) where non-null required", i);
+  cdiag ("(see what comes next)");
+
+  /* Verify that only a single trailing colon is accepted.  */
+  cdiag ("candidates are:");
+  cdiag ("candidates are::"); /* { dg-warning "spurious trailing punctuation sequence .::." } */
+
+  /* Exercise C++.  */
+  cdiag ("C++ is cool");
+  cdiag ("this is c++");
+  cdiag ("you can do this in C++ but not in C");
+
+  /* Also verify that G++ is accepted.  */
+  cdiag ("G++ rocks");
+  cdiag ("this is accepted by g++");
+  cdiag ("valid in G++ (or g++) but not in gcc");
+
+  /* Exercise parenthetical note followed by a colon, semicolon,
+     or a comma.  */
+  cdiag ("found a bug (here):");
+  cdiag ("because of another bug (over there); fix it");
+
+  cdiag ("found foo (123): go look at it");
+  cdiag ("missed bar (abc); will try harder next time");
+
+  cdiag ("expected this (or that), got something else (or who knows what)");
+
+  /* Exercise parenthetical note with a question mark.  */
+  cdiag ("hmmm (did you really mean that?)");
+  cdiag ("error (did you mean %<foo()%>?)");
+  /* And a question mark after a parenthetical note.  */
+  cdiag ("did you mean this (or that)?");
+
+  /* But make sure unbalanced parenthese are diagnosed.  */
+  cdiag ("or this or the other)?");   /* { dg-warning "unbalanced punctuation character '\\\)'" } */
+
+  cdiag ("## Heading");               /* { dg-warning "spurious leading punctuation sequence .##." } */
+  cdiag ("## %s ##", "1");            /* { dg-warning "spurious (leading|trailing) punctuation sequence .##." } */
+
+  cdiag ("#1 priority");              /* { dg-warning "spurious leading punctuation sequence .#." } */
+  cdiag ("priority #2");
+
+  /* Quoting.  */
+  cdiag ("\"quoted\"");
+  cdiag ("\"quoted\" string");
+  cdiag ("this is a \"string in quotes\"");
+  cdiag ("\"missing closing quote");  /* { dg-warning "unterminated quote character '\"'" } */
+
+  /* PR translation/90121 - punctuation character after a space.  */
+  cdiag ("bad version : 1");          /* { dg-warning "space followed by punctuation character ':'" } */
+  cdiag ("problem ; fix it");         /* { dg-warning "space followed by punctuation character ';'" } */
+  cdiag ("End . not.");               /* { dg-warning "space followed by punctuation character '.'" } */
+  cdiag ("it is bad , very bad");     /* { dg-warning "space followed by punctuation character ','" } */
+  cdiag ("say what ?");               /* { dg-warning "space followed by punctuation character '?'" } */
+
+  /* But these are okay after a space.  But should they be?  */
+  cdiag ("1 / 2");
+  cdiag ("2 + 3");
+  cdiag ("2 - 3");
+}
+
+void test_cdiag_punct_balance (tree t, gimple *gc)
+{
+  (void)&t; (void)&gc;
+
+  /* Less-than and greater than.  */
+  cdiag ("a < b");          /* { dg-warning "unbalanced punctuation character '<' in format" } */
+  cdiag ("must be > 0");    /* { dg-warning "unbalanced punctuation character '>' in format" } */
+
+  cdiag ("f()");            /* { dg-warning "spurious trailing punctuation sequence .\\\(\\\)." } */
+  cdiag ("g(1)");
+  cdiag ("(");              /* { dg-warning "spurious leading punctuation character|unbalanced" } */
+  cdiag ("()");             /* { dg-warning "spurious leading punctuation sequence" } */
+  cdiag (")");              /* { dg-warning "unbalanced punctuation character '\\\)'" } */
+  cdiag ("f()g");           /* { dg-warning "unquoted sequence of 2 consecutive punctuation characters" } */
+  cdiag ("illegal operand (1)");
+}
+
+
+void test_cdiag_nongraph (tree t, gimple *gc)
+{
+  (void)&t; (void)&gc;
+
+  cdiag ("a\376b");         /* { dg-warning "unquoted non-graph character '\\\\xfe'" } */
+  cdiag ("a\377b");         /* { dg-warning "unquoted non-graph character '\\\\xff'" } */
+}
+
+
+void test_cdiag_attribute (tree t, gimple *gc)
+{
+  (void)&t; (void)&gc;
+
+  cdiag ("attribute foo");
+  cdiag ("this is attribute bar");
+  cdiag ("bad __attribute bar");        /* { dg-warning "unquoted attribute" } */
+  cdiag ("__attribute__ (foobar) bad"); /* { dg-warning "unquoted attribute" } */
+  cdiag ("__attribute__ ((foobar))");   /* { dg-warning "unquoted attribute" } */
+  cdiag ("__attribute__ (xxx))");       /* { dg-warning "unquoted attribute" } */
+  /* { dg-warning "unbalanced punctuation character '\\\)'" "xxx" { target *-*-* } .-1 } */
+  cdiag ("__attribute__ ((yyy)))");     /* { dg-warning "unquoted attribute" } */
+  /* { dg-warning "unbalanced punctuation character '\\\)'" "yyy" { target *-*-* } .-1 } */
+  cdiag ("__attribute__ ((zzz)");       /* { dg-warning "unquoted attribute" } */
+  /* { dg-warning "unbalanced punctuation character '\\\('" "zzz" { target *-*-* } .-1 } */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-diag"
+
+  /* Verify that the warning can be suppressed.  */
+  cdiag ("__attribute__ (((");
+
+#pragma GCC diagnostic pop
+}
+
+void test_cdiag_builtin (tree t, gimple *gc)
+{
+  (void)&t; (void)&gc;
+
+  cdiag ("__builtin_abort");    /* { dg-warning "unquoted name of built-in function '__builtin_abort'" } */
+  cdiag ("in __builtin_trap");  /* { dg-warning "unquoted name of built-in function '__builtin_trap'" } */
+  cdiag ("__builtin_xyz bites");/* { dg-warning "unquoted name of built-in function '__builtin_xyz'" } */
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-diag"
+
+  /* Verify that the warning can be suppressed.  */
+  cdiag ("__builtin____with____lots__of__underscores");
+
+#pragma GCC diagnostic pop
+}
+
+
+void test_cdiag_option (tree t, gimple *gc)
+{
+  (void)&t; (void)&gc;
+
+  cdiag ("%<-Wall%>");
+  cdiag ("use option %<-Wextra%> to enable additinal warnings");
+
+  cdiag ("-O2 is fast");        /* { dg-warning "unquoted option name '-O2'" } */
+  cdiag ("but -O3 is faster");  /* { dg-warning "unquoted option name '-O3'" } */
+
+  cdiag ("get --help");         /* { dg-warning "unquoted option name '--help'" } */
+  cdiag ("enable -m32");        /* { dg-warning "unquoted option name '-m32'" } */
+  cdiag ("value is -12");
+  cdiag ("foo-O2");
+  cdiag ("a-W");
+}
+
+
+void test_cdiag_keyword (tree t, gimple *gc)
+{
+  cdiag ("alignasi");
+  cdiag ("malignofer or alignofus");
+  cdiag ("use alignof");        /* { dg-warning "unquoted keyword 'alignof'" } */
+  cdiag ("or _Alignof");        /* { dg-warning " keyword '_Alignof'" } */
+  cdiag ("_Pragma too");        /* { dg-warning " keyword '_Pragma'" } */
+
+  cdiag ("a #error directive"); /* { dg-warning "unquoted preprocessing directive '#error'" } */
+  cdiag ("#include file");      /* { dg-warning "unquoted preprocessing directive '#include'" } */
+  cdiag ("but #pragma foobar"); /* { dg-warning "unquoted preprocessing directive '#pragma'" } */
+  cdiag ("pragma foobar is okay");
+  cdiag ("or even # pragma is fine");
+
+  /* Exercise qualifiers.  */
+  cdiag ("const function");
+  cdiag ("const-qualified variable"); /* { dg-warning "unquoted keyword 'const-qualified'" } */
+  /* { dg-message "use '%<const%>-qualified' instead" "const-qualified" { target *-*-* } .-1 } */
+  cdiag ("a const %qD", t);     /* { dg-warning "unquoted keyword 'const'" } */
+  cdiag ("restrict %qE", t);    /* { dg-warning "unquoted keyword 'restrict'" } */
+  cdiag ("volatile %qT", t);    /* { dg-warning "unquoted keyword 'volatile'" } */
+  cdiag ("const %qD and restrict %qE or volatile %qT", t, t, t);
+  /* { dg-warning "unquoted keyword 'const'" "" { target *-*-* } .-1 } */
+  /* { dg-warning "unquoted keyword 'restrict'" "" { target *-*-* } .-2 } */
+  /* { dg-warning "unquoted keyword 'volatile'" "" { target *-*-* } .-3 } */
+
+  cdiag ("an offsetof here");   /* { dg-warning "unquoted keyword 'offsetof" } */
+  cdiag ("sizeof x");           /* { dg-warning "unquoted keyword 'sizeof" } */
+  cdiag ("have typeof");        /* { dg-warning "unquoted keyword 'typeof" } */
+
+  /* Words that are not keywords are so are not expected to be quoted.  */
+  cdiag ("break rules");
+  cdiag ("if we continue by default for a short while else do nothing");
+  cdiag ("register a function for unsigned extern to void const reads");
+  cdiag ("or volatile access");
+}
+
+
+void test_cdiag_operator (tree t, gimple *gc)
+{
+  cdiag ("x != 0");             /* { dg-warning "unquoted operator '!='" } */
+  cdiag ("logical &&");         /* { dg-warning "unquoted operator '&&" } */
+  cdiag ("+= operator");        /* { dg-warning "unquoted operator '\\\+=" } */
+  cdiag ("a == b");             /* { dg-warning "unquoted operator '=='" } */
+  cdiag ("++a");                /* { dg-warning "unquoted operator '\\\+\\\+'" } */
+  cdiag ("b--");                /* { dg-warning "unquoted operator '--'" } */
+  cdiag ("1 << 2");             /* { dg-warning "unquoted operator '<<'" } */
+  cdiag (">> here <<");         /* { dg-warning "unquoted operator '>>|<<'" } */
+}
+
+
+void test_cdiag_type_name (tree t, gimple *gc)
+{
+  cdiag ("the word character should not be quoted");
+  cdiag ("but char should be"); /* { dg-warning "unquoted keyword 'char'" } */
+
+  cdiag ("unsigned char should be quoted");     /* { dg-warning "unquoted type name 'unsigned char'" } */
+  cdiag ("but unsigned character is fine");
+
+  cdiag ("as should int");      /* { dg-warning "unquoted keyword 'int'" } */
+  cdiag ("and signed int");     /* { dg-warning "unquoted type name 'signed int'" } */
+  cdiag ("and also unsigned int");     /* { dg-warning "unquoted type name 'unsigned int'" } */
+  cdiag ("very long thing");
+  cdiag ("use long long here"); /* { dg-warning "unquoted type name 'long long'" } */
+
+  cdiag ("have a floating type");
+  cdiag ("found float type");   /* { dg-warning "unquoted keyword 'float'" } */
+
+  cdiag ("wchar_t is wide");    /* { dg-warning "unquoted identifier or keyword 'wchar_t'" } */
+}
+
+
+void test_cdiag_identifier (tree t, gimple *gc)
+{
+  (void)&t; (void)&gc;
+
+  cdiag ("private _x ident");   /* { dg-warning "unquoted identifier or keyword '_x'" } */
+  cdiag ("and another __y");    /* { dg-warning "unquoted identifier or keyword '__y'" } */
+  cdiag ("ident z_ with trailing underscore");   /* { dg-warning "unquoted identifier or keyword 'z_'" } */
+  cdiag ("v_ variable");        /* { dg-warning "unquoted identifier or keyword 'v_'" } */
+  cdiag ("call foo_bar");       /* { dg-warning "unquoted identifier or keyword 'foo_bar'" } */
+  cdiag ("unqoted x_y ident");  /* { dg-warning "unquoted identifier or keyword 'x_y'" } */
+
+  cdiag ("size_t type");        /* { dg-warning "unquoted identifier or keyword 'size_t'" } */
+  cdiag ("bigger than INT_MAX");/* { dg-warning "unquoted identifier or keyword 'INT_MAX'" } */
+
+  cdiag ("quoted ident %<a_b%>");
+  cdiag ("another quoted identifier %<x_%> here");
+}
+
+
+void test_cdiag_bad_words (tree t, gimple *gc)
+{
+  (void)&t; (void)&gc;
+
+  cdiag ("aren't you dumb?");  /* { dg-warning "bare apostrophe ''' in format" } */
+  cdiag ("bitfields suck");    /* { dg-warning "misspelled term 'bitfields' in format; use 'bit-fields' instead" } */
+  cdiag ("invalid bitfield");  /* { dg-warning "misspelled term 'bitfield' in format; use 'bit-field' instead" } */
+  cdiag ("bad builtin function");  /* { dg-warning "misspelled term 'builtin function' in format; use 'built-in function' instead" } */
+  cdiag ("bad builtin function");  /* { dg-warning "misspelled term 'builtin function' in format; use 'built-in function' instead" } */
+  cdiag ("builtin function x");    /* { dg-warning "misspelled term 'builtin function' in format; use 'built-in function' instead" } */
+  cdiag ("builtin functions disabled");    /* { dg-warning "misspelled term 'builtin functions' in format; use 'built-in functions' instead" } */
+  cdiag ("enable builtin functions");      /* { dg-warning "misspelled term 'builtin functions' in format; use 'built-in functions' instead" } */
+  cdiag ("you can't do that"); /* { dg-warning "contraction 'can't' in format" } */
+  cdiag ("you can%'t do that");/* { dg-warning "contraction 'can%'t' in format" } */
+  cdiag ("Can%'t touch this.");/* { dg-warning "contraction 'Can%'t' in format" } */
+  cdiag ("on the commandline");/* { dg-warning "misspelled term 'commandline' in format; use 'command line' instead" } */
+  cdiag ("command line option");/* { dg-warning "misspelled term 'command line option' in format; use 'command-line option' instead" } */
+  cdiag ("it mustn't be");     /* { dg-warning "contraction 'mustn't' in format" } */
+  cdiag ("isn't that silly?"); /* { dg-warning "bare apostrophe ''' in format" } */
+
+  cdiag ("can not do this");   /* { dg-warning "misspelled term 'can not' in format; use 'cannot' instead" } */
+  cdiag ("you can not");       /* { dg-warning "misspelled term 'can not' in format; use 'cannot' instead" } */
+
+  /* See PR target/90157 - aarch64: unnecessary abbreviation in diagnostic */
+  cdiag ("Mising arg.");       /* { dg-warning "misspelled term 'arg' in format; use 'argument' instead" } */
+  cdiag ("2 args: a and b");   /* { dg-warning "misspelled term 'args' in format; use 'arguments' instead" } */
+  cdiag ("arg 1");             /* { dg-warning "misspelled term 'arg' in format; use 'argument' instead" } */
+  cdiag ("Args are wrong.");   /* { dg-warning "misspelled term 'Args' in format; use 'arguments' instead" } */
+  cdiag ("bad arg");           /* { dg-warning "misspelled term 'arg' in format; use 'argument' instead" } */
+  cdiag ("two args");          /* { dg-warning "misspelled term 'args' in format; use 'arguments' instead" } */
+  cdiag ("args 1 and 2");      /* { dg-warning "misspelled term 'args' in format; use 'arguments' instead" } */
+
+  cdiag ("Reg A");             /* { dg-warning "misspelled term 'Reg' in format; use 'register' instead" } */
+  cdiag ("regs A and B");      /* { dg-warning "misspelled term 'regs' in format; use 'registers' instead" } */
+  cdiag ("no regs");           /* { dg-warning "misspelled term 'regs' in format; use 'registers' instead" } */
+
+  /* Verify words that end in "arg" and "args" or "reg" and "regs" are
+     not diagnosed.  */
+  cdiag ("gulmarg and balfarg");
+  cdiag ("ademargs or toshargs");
+  cdiag ("talk to Greg");
+  cdiag ("prepreg is a fabric");
+  cdiag ("there are dregs in my wine");
+}
+
+
+void test_cdiag_directive (tree t, gimple *gc)
+{
+  (void)&t; (void)&gc;
+
+  cxxdiag ("%<%s%>", "");     /* { dg-warning "quoted '%s' directive in format" } */
+  /* This was asked to be diagnosed in PR #90158 but there, the \"%s\"
+     is in parenheses which ends up getting diagnosed because of
+     the two consecutive punctuation characters, ( and ".  */
+  cdiag ("\"%s\"", "");       /* { dg-warning "quoted '%s' directive in format" } */
+
+  /* Make sure quoted paired tokens are not diagnosed.  */
+  cdiag ("%<'%>");
+  cdiag ("%<\"%>");
+  cdiag ("%<<%>");
+  cdiag ("%<>%>");
+  cdiag ("%<(%>");
+  cdiag ("%<)%>");
+  cdiag ("%<[%>");
+  cdiag ("%<]%>");
+
+  cdiag ("%<'%> %<\"%> %<>%> %<<%> %<)%> %<(%> %<]%> %<[%>");
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]