[RFC] New pragma exec_charset

Andreas Krebbel krebbel@linux.vnet.ibm.com
Thu Oct 19 15:50:00 GMT 2017


The TPF operating system uses the GCC S/390 backend.  They set an
EBCDIC exec charset for compilation using -fexec-charset.  However,
certain libraries require ASCII strings instead.  In order to be able
to put calls to that library into the normal code it is required to
switch the exec charset within a compilation unit.

This is an attempt to implement it by adding a new pragma which could
be used like in the following example:

int
foo ()
{
  call_with_utf8("hello world");

#pragma GCC exec_charset("UTF16")
  call_with_utf16("hello world");

#pragma GCC exec_charset(pop)
  call_with_utf8("hello world");
}

Does this look reasonable?

Bye,

-Andreas-
---
 gcc/c-family/c-pragma.c                      | 50 ++++++++++++++++++++++++++++
 gcc/doc/extend.texi                          | 26 +++++++++++++++
 gcc/testsuite/gcc.dg/pragma-exec_charset-1.c | 26 +++++++++++++++
 libcpp/charset.c                             |  2 +-
 libcpp/include/cpplib.h                      |  3 ++
 libcpp/init.c                                |  2 +-
 libcpp/internal.h                            |  1 -
 7 files changed, 107 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pragma-exec_charset-1.c

diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index f7b59b3..db281b9 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -34,6 +34,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "opts.h"
 #include "plugin.h"
 
+extern cpp_options *cpp_opts;
+
 #define GCC_BAD(gmsgid) \
   do { warning (OPT_Wpragmas, gmsgid); return; } while (0)
 #define GCC_BAD2(gmsgid, arg) \
@@ -1141,6 +1143,52 @@ handle_pragma_message (cpp_reader *ARG_UNUSED(dummy))
     inform (input_location, "#pragma message: %s", TREE_STRING_POINTER (message));
 }
 
+static void
+handle_pragma_exec_charset (cpp_reader *ARG_UNUSED(dummy))
+{
+  enum cpp_ttype token;
+  tree x;
+  static const char* previous_charset = NULL;
+
+  token = pragma_lex (&x);
+  if (token == CPP_OPEN_PAREN)
+    {
+      token = pragma_lex (&x);
+      if (token == CPP_STRING)
+	{
+	  previous_charset = cpp_opts->narrow_charset;
+	  cpp_opts->narrow_charset = TREE_STRING_POINTER (x);
+	}
+      else if (token == CPP_NAME
+	       && strncmp (IDENTIFIER_POINTER (x), "pop", 3) == 0)
+	{
+	  if (previous_charset == NULL)
+	    {
+	      warning (OPT_Wpragmas,
+		       "pop without previous exec_charset use - ignored");
+	      return;
+	    }
+	  cpp_opts->narrow_charset = previous_charset;
+	  previous_charset = NULL;
+	}
+      else
+	GCC_BAD ("expected a charset string or pop after %<#pragma exec_charset%>");
+
+      if (pragma_lex (&x) != CPP_CLOSE_PAREN)
+	GCC_BAD ("malformed %<#pragma exec_charset%>, ignored");
+    }
+  else
+    GCC_BAD ("expected a string after %<#pragma exec_charset%>");
+
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of %<#pragma exec_charset%>");
+
+  inform (input_location, "switching to exec charset: %s",
+	  cpp_opts->narrow_charset);
+  cpp_destroy_iconv (parse_in);
+  cpp_init_iconv (parse_in);
+}
+
 /* Mark whether the current location is valid for a STDC pragma.  */
 
 static bool valid_location_for_stdc_pragma;
@@ -1571,6 +1619,8 @@ init_pragma (void)
 				    handle_pragma_redefine_extname);
 
   c_register_pragma_with_expansion (0, "message", handle_pragma_message);
+  c_register_pragma_with_expansion ("GCC", "exec_charset",
+				    handle_pragma_exec_charset);
 
 #ifdef REGISTER_TARGET_PRAGMAS
   REGISTER_TARGET_PRAGMAS ();
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index d9b7a54..b67993a 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21611,6 +21611,7 @@ for further explanation.
 * Push/Pop Macro Pragmas::
 * Function Specific Option Pragmas::
 * Loop-Specific Pragmas::
+* Charset-Specific Pragmas::
 @end menu
 
 @node AArch64 Pragmas
@@ -22209,6 +22210,31 @@ void ignore_vec_dep (int *a, int k, int c, int m)
 @}
 @end smallexample
 
+@node Charset-Specific Pragmas
+@subsection Charset-Specific Pragmas
+
+@table @code
+@item #pragma GCC exec_charset(@var{"charset"})
+@cindex pragma GCC exec_charset
+
+Set the execution character set, used for string and character
+constants.  The default is the exec charset specified with
+@option{-fexec-charset} or UTF-8 if @option{-fexec-charset} isn't used.
+charset can be any encoding supported by the system's "iconv" library
+routine.  The special value @var{pop} (without ``) can be
+used to switch back to the exec charset before the last @code{#pragma
+GCC exec_charset} setting.
+@end table
+
+@smallexample
+call_with_utf8("hello world");
+
+#pragma GCC exec_charset("UTF16")
+call_with_utf16("hello world");
+
+#pragma GCC exec_charset(pop)
+call_with_utf8("hello world");
+@end smallexample
 
 @node Unnamed Fields
 @section Unnamed Structure and Union Fields
diff --git a/gcc/testsuite/gcc.dg/pragma-exec_charset-1.c b/gcc/testsuite/gcc.dg/pragma-exec_charset-1.c
new file mode 100644
index 0000000..5c695aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pragma-exec_charset-1.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+
+char t1[] = "hello world";
+#pragma GCC exec_charset("EBCDIC-US")
+char t2[] = "hello world";
+#pragma GCC exec_charset(pop)
+char t3[] = "hello world";
+
+char hello_world_utf8[12] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x00 };
+char hello_world_ebcdic[12] = { 0x88, 0x85, 0x93, 0x93, 0x96, 0x40, 0xa6, 0x96, 0x99, 0x93, 0x84, 0x00 };
+
+int
+main ()
+{
+  if (__builtin_memcmp (t1, hello_world_utf8, 12) != 0)
+    __builtin_abort ();
+
+  if (__builtin_memcmp (t2, hello_world_ebcdic, 12) != 0)
+    __builtin_abort ();
+
+  if (__builtin_memcmp (t3, hello_world_utf8, 12) != 0)
+    __builtin_abort ();
+
+}
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 6a3bbbc..47fa406 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -737,7 +737,7 @@ cpp_init_iconv (cpp_reader *pfile)
 
 /* Destroy iconv(3) descriptors set up by cpp_init_iconv, if necessary.  */
 void
-_cpp_destroy_iconv (cpp_reader *pfile)
+cpp_destroy_iconv (cpp_reader *pfile)
 {
   if (HAVE_ICONV)
     {
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 804132a..acbdf5a 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -861,6 +861,9 @@ extern void cpp_post_options (cpp_reader *);
 /* Set up translation to the target character set.  */
 extern void cpp_init_iconv (cpp_reader *);
 
+/* Cleanup translation to the target character set.  */
+extern void cpp_destroy_iconv (cpp_reader *);
+
 /* Call this to finish preprocessing.  If you requested dependency
    generation, pass an open stream to write the information to,
    otherwise NULL.  It is your responsibility to close the stream.  */
diff --git a/libcpp/init.c b/libcpp/init.c
index 16ff202..4e68645 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -314,7 +314,7 @@ cpp_destroy (cpp_reader *pfile)
 
   _cpp_destroy_hashtable (pfile);
   _cpp_cleanup_files (pfile);
-  _cpp_destroy_iconv (pfile);
+  cpp_destroy_iconv (pfile);
 
   _cpp_free_buff (pfile->a_buff);
   _cpp_free_buff (pfile->u_buff);
diff --git a/libcpp/internal.h b/libcpp/internal.h
index f24e85c..ce2d902 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -757,7 +757,6 @@ extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
 			    cppchar_t *,
 			    source_range *char_range,
 			    cpp_string_location_reader *loc_reader);
-extern void _cpp_destroy_iconv (cpp_reader *);
 extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
 					  unsigned char *, size_t, size_t,
 					  const unsigned char **, off_t *);
-- 
2.9.1



More information about the Gcc-patches mailing list