[google/integration] Extend C++11 UDLs to be compatible with inttypes.h macros (issue6104051)

Jeffrey Yasskin jyasskin@google.com
Mon Apr 23 03:11:00 GMT 2012


Could you try to get this into mainline instead of just the google
branches? In http://gcc.gnu.org/PR52538, Jonathan sounded like he'd
consider accepting it.

On Sun, Apr 22, 2012 at 7:54 PM, Ollie Wild <aaw@google.com> wrote:
> Add new option, -Wreserved-user-defined-literal.
>
> This option, which is enabled by default, causes the preprocessor to warn
> when a string or character literal is followed by a ud-suffix which does
> not begin with an underscore.  According to [lex.ext]p10, this is
> ill-formed.
>
> Also modifies the preprocessor to treat such ill-formed suffixes as separate
> preprocessing tokens.  This is consistent with the Clang front end (see
> http://llvm.org/viewvc/llvm-project?view=rev&revision=152287), and enables
> backwards compatibility with code that uses formatting macros from
> <inttypes.h>, as in the following code block:
>
>  int main() {
>    int64_t i64 = 123;
>    printf("My int64: %"PRId64"\n", i64);
>  }
>
> Google ref b/6377711.
>
> 2012-04-22   Ollie Wild  <aaw@google.com>
>
>        * gcc/c-family/c-common.c:
>        * gcc/c-family/c-opts.c (c_common_handle_option):
>        * gcc/c-family/c.opt:
>        * gcc/doc/invoke.texi (struct A):
>        * gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C (test):
>        (main):
>        * libcpp/include/cpplib.h (struct cpp_options):
>        * libcpp/init.c (cpp_create_reader):
>        * libcpp/lex.c (lex_raw_string):
>        (lex_string):
>
> diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
> index 1d19251..915dc25 100644
> --- a/gcc/c-family/c-common.c
> +++ b/gcc/c-family/c-common.c
> @@ -8724,6 +8724,7 @@ static const struct reason_option_codes_t option_codes[] = {
>   {CPP_W_NORMALIZE,                    OPT_Wnormalized_},
>   {CPP_W_INVALID_PCH,                  OPT_Winvalid_pch},
>   {CPP_W_WARNING_DIRECTIVE,            OPT_Wcpp},
> +  {CPP_W_RESERVED_USER_LITERALS,       OPT_Wreserved_user_defined_literal},
>   {CPP_W_NONE,                         0}
>  };
>
> diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
> index 5118928..dab6ce5 100644
> --- a/gcc/c-family/c-opts.c
> +++ b/gcc/c-family/c-opts.c
> @@ -509,6 +509,10 @@ c_common_handle_option (size_t scode, const char *arg, int value,
>          break;
>        }
>
> +    case OPT_Wreserved_user_defined_literal:
> +      cpp_opts->warn_reserved_user_literals = value;
> +      break;
> +
>     case OPT_Wreturn_type:
>       warn_return_type = value;
>       break;
> diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
> index 40ff96c..f610513 100644
> --- a/gcc/c-family/c.opt
> +++ b/gcc/c-family/c.opt
> @@ -589,6 +589,10 @@ Wreorder
>  C++ ObjC++ Var(warn_reorder) Warning
>  Warn when the compiler reorders code
>
> +Wreserved-user-defined-literal
> +C++ ObjC++ Warning
> +Warn when a string or character literal is followed by a ud-suffix which does not begin with an underscore.
> +
>  Wreturn-type
>  C ObjC C++ ObjC++ Var(warn_return_type) Warning
>  Warn whenever a function's return type defaults to \"int\" (C), or about inconsistent return types (C++)
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 1b61e76..d425079 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -198,7 +198,7 @@ in the following sections.
>  -fvisibility-ms-compat @gol
>  -Wabi  -Wconversion-null  -Wctor-dtor-privacy @gol
>  -Wdelete-non-virtual-dtor -Wnarrowing -Wnoexcept @gol
> --Wnon-virtual-dtor  -Wreorder @gol
> +-Wnon-virtual-dtor  -Wreorder -Wreserved-user-defined-literal @gol
>  -Weffc++  -Wstrict-null-sentinel @gol
>  -Wno-non-template-friend  -Wold-style-cast @gol
>  -Woverloaded-virtual  -Wno-pmf-conversions @gol
> @@ -2474,6 +2474,30 @@ struct A @{
>  The compiler will rearrange the member initializers for @samp{i}
>  and @samp{j} to match the declaration order of the members, emitting
>  a warning to that effect.  This warning is enabled by @option{-Wall}.
> +
> +@item -Wreserved-user-defined-literal @r{(C++ and Objective-C++ only)}
> +@opindex Wreserved-user-defined-literal
> +@opindex Wno-reserved-user-defined-literal
> +Warn when a string or character literal is followed by a ud-suffix which does
> +not begin with an underscore.  As a conforming extension, GCC treats such
> +suffixes as separate preprocessing tokens in order to maintain backwards
> +compatibility with code that uses formatting macros from @code{<inttypes.h>}.
> +For example:
> +
> +@smallexample
> +#define __STDC_FORMAT_MACROS
> +#include <inttypes.h>
> +#include <stdio.h>
> +
> +int main() @{
> +  int64_t i64 = 123;
> +  printf("My int64: %"PRId64"\n", i64);
> +@}
> +@end smallexample
> +
> +In this case, @code{PRId64} is treated as a separate preprocessing token.
> +
> +This warning is enabled by default.
>  @end table
>
>  The following @option{-W@dots{}} options are not affected by @option{-Wall}.
> diff --git a/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
> new file mode 100644
> index 0000000..66de5c0
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/Wreserved-user-defined-literal.C
> @@ -0,0 +1,29 @@
> +// { dg-do run }
> +// { dg-options "-std=c++0x" }
> +
> +// Make sure -Wreserved-user-defined-literal is enabled by default and
> +// triggers as expected.
> +
> +#define BAR "bar"
> +#define PLUS_ONE + 1
> +
> +#include <cstdint>
> +#include <cassert>
> +
> +
> +void
> +test()
> +{
> +  char c = '3'PLUS_ONE;          // { dg-warning "invalid suffix on literal" }
> +  char s[] = "foo"BAR;   // { dg-warning "invalid suffix on literal" }
> +
> +  assert(c == '4');
> +  assert(s[3] != '\0');
> +  assert(s[3] == 'b');
> +}
> +
> +int
> +main()
> +{
> +  test();
> +}
> diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
> index bf59d01..84cacb0 100644
> --- a/libcpp/include/cpplib.h
> +++ b/libcpp/include/cpplib.h
> @@ -427,6 +427,10 @@ struct cpp_options
>   /* Nonzero for C++ 2011 Standard user-defnied literals.  */
>   unsigned char user_literals;
>
> +  /* Nonzero means warn when a string or character literal is followed by a
> +     ud-suffix which does not beging with an underscore.  */
> +  unsigned char warn_reserved_user_literals;
> +
>   /* Holds the name of the target (execution) character set.  */
>   const char *narrow_charset;
>
> @@ -906,7 +910,8 @@ enum {
>   CPP_W_CXX_OPERATOR_NAMES,
>   CPP_W_NORMALIZE,
>   CPP_W_INVALID_PCH,
> -  CPP_W_WARNING_DIRECTIVE
> +  CPP_W_WARNING_DIRECTIVE,
> +  CPP_W_RESERVED_USER_LITERALS
>  };
>
>  /* Output a diagnostic of some kind.  */
> diff --git a/libcpp/init.c b/libcpp/init.c
> index 5fa82ca..2688699 100644
> --- a/libcpp/init.c
> +++ b/libcpp/init.c
> @@ -175,6 +175,7 @@ cpp_create_reader (enum c_lang lang, hash_table *table,
>   CPP_OPTION (pfile, warn_variadic_macros) = 1;
>   CPP_OPTION (pfile, warn_builtin_macro_redefined) = 1;
>   CPP_OPTION (pfile, warn_normalize) = normalized_C;
> +  CPP_OPTION (pfile, warn_reserved_user_literals) = 1;
>
>   /* Default CPP arithmetic to something sensible for the host for the
>      benefit of dumb users like fix-header.  */
> diff --git a/libcpp/lex.c b/libcpp/lex.c
> index 0ad9660..ba8ed9e 100644
> --- a/libcpp/lex.c
> +++ b/libcpp/lex.c
> @@ -1491,14 +1491,30 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
>
>   if (CPP_OPTION (pfile, user_literals))
>     {
> +      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
> +        underscore is ill-formed.  Since this breaks programs using macros
> +        from inttypes.h, we generate a warning and treat the ud-suffix as a
> +        separate preprocessing token.  This approach is under discussion by
> +        the standards committee, and has been adopted as a conforming
> +        extension by other front ends such as clang. */
> +      if (ISALPHA(*cur))
> +       {
> +         // Raise a warning, but do not consume subsequent tokens.
> +         if (CPP_OPTION (pfile, warn_reserved_user_literals))
> +           cpp_warning_with_line (pfile, CPP_W_RESERVED_USER_LITERALS,
> +                                  token->src_loc, 0,
> +                                  "invalid suffix on literal; C++11 requires "
> +                                  "a space between literal and identifier");
> +       }
>       /* Grab user defined literal suffix.  */
> -      if (ISIDST (*cur))
> +      else if (*cur == '_')
>        {
>          type = cpp_userdef_string_add_type (type);
>          ++cur;
> +
> +         while (ISIDNUM (*cur))
> +           ++cur;
>        }
> -      while (ISIDNUM (*cur))
> -       ++cur;
>     }
>
>   pfile->buffer->cur = cur;
> @@ -1606,15 +1622,31 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
>
>   if (CPP_OPTION (pfile, user_literals))
>     {
> +      /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
> +        underscore is ill-formed.  Since this breaks programs using macros
> +        from inttypes.h, we generate a warning and treat the ud-suffix as a
> +        separate preprocessing token.  This approach is under discussion by
> +        the standards committee, and has been adopted as a conforming
> +        extension by other front ends such as clang. */
> +      if (ISALPHA(*cur))
> +       {
> +         // Raise a warning, but do not consume subsequent tokens.
> +         if (CPP_OPTION (pfile, warn_reserved_user_literals))
> +           cpp_warning_with_line (pfile, CPP_W_RESERVED_USER_LITERALS,
> +                                  token->src_loc, 0,
> +                                  "invalid suffix on literal; C++11 requires "
> +                                  "a space between literal and identifier");
> +       }
>       /* Grab user defined literal suffix.  */
> -      if (ISIDST (*cur))
> +      else if (*cur == '_')
>        {
>          type = cpp_userdef_char_add_type (type);
>          type = cpp_userdef_string_add_type (type);
>           ++cur;
> +
> +         while (ISIDNUM (*cur))
> +           ++cur;
>        }
> -      while (ISIDNUM (*cur))
> -       ++cur;
>     }
>
>   pfile->buffer->cur = cur;
>
> --
> This patch is available for review at http://codereview.appspot.com/6104051



More information about the Gcc-patches mailing list