PR c++/52538 Extend C++11 UDLs to be compatible with inttypes.h macros (issue6109043)

Ollie Wild aaw@google.com
Mon Apr 23 13:26:00 GMT 2012


On Mon, Apr 23, 2012 at 7:53 AM, Gabriel Dos Reis
<gdr@integrable-solutions.net> wrote:
> On Mon, Apr 23, 2012 at 7:30 AM, Ollie Wild <aaw@google.com> wrote:
>>
>> Do you still want me to shorten in?
>
> yes.

Done.  Updated patch attached.

Ollie
-------------- next part --------------
commit 3f53671fb7fc7811277f047e7914f78e127031a6
Author: Ollie Wild <aaw@google.com>
Date:   Sun Apr 22 21:37:08 2012 -0500

    Add new option, -Wreserved-user-defined-literal.
    
    This option, which is enabled by default, causes the preprocessor to warn
    when a string or character literal is followed by a ud-suffix which does
    not begin with an underscore.  According to [lex.ext]p10, this is
    ill-formed.
    
    Also modifies the preprocessor to treat such ill-formed suffixes as separate
    preprocessing tokens.  This is consistent with the Clang front end (see
    http://llvm.org/viewvc/llvm-project?view=rev&revision=152287), and enables
    backwards compatibility with code that uses formatting macros from
    <inttypes.h>, as in the following code block:
    
      int main() {
        int64_t i64 = 123;
        printf("My int64: %"PRId64"\n", i64);
      }
    
    Google ref b/6377711.
    
    2012-04-22   Ollie Wild  <aaw@google.com>
    
    	* gcc/c-family/c-common.c: Add CPP_W_LITERAL_SUFFIX mapping.
    	* gcc/c-family/c-opts.c (c_common_handle_option): Handle
    	OPT_Wliteral_suffix.
    	* gcc/c-family/c.opt: Add Wliteral-suffix.
    	* gcc/doc/invoke.texi (Wliteral-suffix): Document new option.
    	* gcc/testsuite/g++.dg/cpp0x/Wliteral-suffix.c: New test.
    	* libcpp/include/cpplib.h (struct cpp_options): Add new field,
    	warn_literal_suffix.
    	(CPP_W_LITERAL_SUFFIX): New enum.
    	* libcpp/init.c (cpp_create_reader): Default initialization of
    	warn_literal_suffix.
    	* libcpp/lex.c (lex_raw_string): Treat user-defined literals which
    	don't begin with '_' as separate tokens and produce a warning.
    	(lex_string): Ditto.

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 4eacd19..bf5b034 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -8820,6 +8820,7 @@ static const struct reason_option_codes_t option_codes[] = {
   {CPP_W_NORMALIZE,			OPT_Wnormalized_},
   {CPP_W_INVALID_PCH,			OPT_Winvalid_pch},
   {CPP_W_WARNING_DIRECTIVE,		OPT_Wcpp},
+  {CPP_W_LITERAL_SUFFIX,		OPT_Wliteral_suffix},
   {CPP_W_NONE,				0}
 };
 
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 17e1958..2510747 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -476,6 +476,10 @@ c_common_handle_option (size_t scode, const char *arg, int value,
       cpp_opts->warn_invalid_pch = value;
       break;
 
+    case OPT_Wliteral_suffix:
+      cpp_opts->warn_literal_suffix = value;
+      break;
+
     case OPT_Wlong_long:
       cpp_opts->cpp_warn_long_long = value;
       break;
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index d8c944d..db8ca81 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -449,6 +449,10 @@ Wjump-misses-init
 C ObjC Var(warn_jump_misses_init) Init(-1) Warning
 Warn when a jump misses a variable initialization
 
+Wliteral-suffix
+C++ ObjC++ Warning
+Warn when a string or character literal is followed by a ud-suffix which does not begin with an underscore.
+
 Wlogical-op
 C ObjC C++ ObjC++ Var(warn_logical_op) Init(0) Warning 
 Warn when a logical operator is suspiciously always evaluating to true or false
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 8ca2f4e..3c9588a 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -198,8 +198,8 @@ in the following sections.
 -fno-default-inline  -fvisibility-inlines-hidden @gol
 -fvisibility-ms-compat @gol
 -Wabi  -Wconversion-null  -Wctor-dtor-privacy @gol
--Wdelete-non-virtual-dtor -Wnarrowing -Wnoexcept @gol
--Wnon-virtual-dtor  -Wreorder @gol
+-Wdelete-non-virtual-dtor -Wliteral-suffix -Wnarrowing @gol
+-Wnoexcept -Wnon-virtual-dtor  -Wreorder @gol
 -Weffc++  -Wstrict-null-sentinel @gol
 -Wno-non-template-friend  -Wold-style-cast @gol
 -Woverloaded-virtual  -Wno-pmf-conversions @gol
@@ -2425,6 +2425,30 @@ an instance of a derived class through a pointer to a base class if the
 base class does not have a virtual destructor.  This warning is enabled
 by @option{-Wall}.
 
+@item -Wliteral-suffix @r{(C++ and Objective-C++ only)}
+@opindex Wliteral-suffix
+@opindex Wno-literal-suffix
+Warn when a string or character literal is followed by a ud-suffix which does
+not begin with an underscore.  As a conforming extension, GCC treats such
+suffixes as separate preprocessing tokens in order to maintain backwards
+compatibility with code that uses formatting macros from @code{<inttypes.h>}.
+For example:
+
+@smallexample
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#include <stdio.h>
+
+int main() @{
+  int64_t i64 = 123;
+  printf("My int64: %"PRId64"\n", i64);
+@}
+@end smallexample
+
+In this case, @code{PRId64} is treated as a separate preprocessing token.
+
+This warning is enabled by default.
+
 @item -Wnarrowing @r{(C++ and Objective-C++ only)}
 @opindex Wnarrowing
 @opindex Wno-narrowing
diff --git a/gcc/testsuite/g++.dg/cpp0x/Wliteral-suffix.C b/gcc/testsuite/g++.dg/cpp0x/Wliteral-suffix.C
new file mode 100644
index 0000000..39a8353
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/Wliteral-suffix.C
@@ -0,0 +1,29 @@
+// { dg-do run }
+// { dg-options "-std=c++0x" }
+
+// Make sure -Wliteral-suffix is enabled by default and
+// triggers as expected.
+
+#define BAR "bar"
+#define PLUS_ONE + 1
+
+#include <cstdint>
+#include <cassert>
+
+
+void
+test()
+{
+  char c = '3'PLUS_ONE;	  // { dg-warning "invalid suffix on literal" }
+  char s[] = "foo"BAR;	  // { dg-warning "invalid suffix on literal" }
+
+  assert(c == '4');
+  assert(s[3] != '\0');
+  assert(s[3] == 'b');
+}
+
+int
+main()
+{
+  test();
+}
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index bf59d01..9dbc477 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -427,6 +427,10 @@ struct cpp_options
   /* Nonzero for C++ 2011 Standard user-defnied literals.  */
   unsigned char user_literals;
 
+  /* Nonzero means warn when a string or character literal is followed by a
+     ud-suffix which does not beging with an underscore.  */
+  unsigned char warn_literal_suffix;
+
   /* Holds the name of the target (execution) character set.  */
   const char *narrow_charset;
 
@@ -906,7 +910,8 @@ enum {
   CPP_W_CXX_OPERATOR_NAMES,
   CPP_W_NORMALIZE,
   CPP_W_INVALID_PCH,
-  CPP_W_WARNING_DIRECTIVE
+  CPP_W_WARNING_DIRECTIVE,
+  CPP_W_LITERAL_SUFFIX
 };
 
 /* Output a diagnostic of some kind.  */
diff --git a/libcpp/init.c b/libcpp/init.c
index 5fa82ca..3262184 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -175,6 +175,7 @@ cpp_create_reader (enum c_lang lang, hash_table *table,
   CPP_OPTION (pfile, warn_variadic_macros) = 1;
   CPP_OPTION (pfile, warn_builtin_macro_redefined) = 1;
   CPP_OPTION (pfile, warn_normalize) = normalized_C;
+  CPP_OPTION (pfile, warn_literal_suffix) = 1;
 
   /* Default CPP arithmetic to something sensible for the host for the
      benefit of dumb users like fix-header.  */
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 9d23002..edab996 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1553,14 +1553,30 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 
   if (CPP_OPTION (pfile, user_literals))
     {
+      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
+	 underscore is ill-formed.  Since this breaks programs using macros
+	 from inttypes.h, we generate a warning and treat the ud-suffix as a
+	 separate preprocessing token.  This approach is under discussion by
+	 the standards committee, and has been adopted as a conforming
+	 extension by other front ends such as clang. */
+      if (ISALPHA(*cur))
+	{
+	  // Raise a warning, but do not consume subsequent tokens.
+	  if (CPP_OPTION (pfile, warn_literal_suffix))
+	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
+				   token->src_loc, 0,
+				   "invalid suffix on literal; C++11 requires "
+				   "a space between literal and identifier");
+	}
       /* Grab user defined literal suffix.  */
-      if (ISIDST (*cur))
+      else if (*cur == '_')
 	{
 	  type = cpp_userdef_string_add_type (type);
 	  ++cur;
+
+	  while (ISIDNUM (*cur))
+	    ++cur;
 	}
-      while (ISIDNUM (*cur))
-	++cur;
     }
 
   pfile->buffer->cur = cur;
@@ -1668,15 +1684,31 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 
   if (CPP_OPTION (pfile, user_literals))
     {
+      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
+	 underscore is ill-formed.  Since this breaks programs using macros
+	 from inttypes.h, we generate a warning and treat the ud-suffix as a
+	 separate preprocessing token.  This approach is under discussion by
+	 the standards committee, and has been adopted as a conforming
+	 extension by other front ends such as clang. */
+      if (ISALPHA(*cur))
+	{
+	  // Raise a warning, but do not consume subsequent tokens.
+	  if (CPP_OPTION (pfile, warn_literal_suffix))
+	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
+				   token->src_loc, 0,
+				   "invalid suffix on literal; C++11 requires "
+				   "a space between literal and identifier");
+	}
       /* Grab user defined literal suffix.  */
-      if (ISIDST (*cur))
+      else if (*cur == '_')
 	{
 	  type = cpp_userdef_char_add_type (type);
 	  type = cpp_userdef_string_add_type (type);
           ++cur;
+
+	  while (ISIDNUM (*cur))
+	    ++cur;
 	}
-      while (ISIDNUM (*cur))
-	++cur;
     }
 
   pfile->buffer->cur = cur;


More information about the Gcc-patches mailing list