[RFC] New pragma exec_charset
Andreas Krebbel
krebbel@linux.vnet.ibm.com
Thu Oct 19 15:50:00 GMT 2017
The TPF operating system uses the GCC S/390 backend. They set an
EBCDIC exec charset for compilation using -fexec-charset. However,
certain libraries require ASCII strings instead. In order to be able
to put calls to that library into the normal code it is required to
switch the exec charset within a compilation unit.
This is an attempt to implement it by adding a new pragma which could
be used like in the following example:
int
foo ()
{
call_with_utf8("hello world");
#pragma GCC exec_charset("UTF16")
call_with_utf16("hello world");
#pragma GCC exec_charset(pop)
call_with_utf8("hello world");
}
Does this look reasonable?
Bye,
-Andreas-
---
gcc/c-family/c-pragma.c | 50 ++++++++++++++++++++++++++++
gcc/doc/extend.texi | 26 +++++++++++++++
gcc/testsuite/gcc.dg/pragma-exec_charset-1.c | 26 +++++++++++++++
libcpp/charset.c | 2 +-
libcpp/include/cpplib.h | 3 ++
libcpp/init.c | 2 +-
libcpp/internal.h | 1 -
7 files changed, 107 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/pragma-exec_charset-1.c
diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index f7b59b3..db281b9 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -34,6 +34,8 @@ along with GCC; see the file COPYING3. If not see
#include "opts.h"
#include "plugin.h"
+extern cpp_options *cpp_opts;
+
#define GCC_BAD(gmsgid) \
do { warning (OPT_Wpragmas, gmsgid); return; } while (0)
#define GCC_BAD2(gmsgid, arg) \
@@ -1141,6 +1143,52 @@ handle_pragma_message (cpp_reader *ARG_UNUSED(dummy))
inform (input_location, "#pragma message: %s", TREE_STRING_POINTER (message));
}
+static void
+handle_pragma_exec_charset (cpp_reader *ARG_UNUSED(dummy))
+{
+ enum cpp_ttype token;
+ tree x;
+ static const char* previous_charset = NULL;
+
+ token = pragma_lex (&x);
+ if (token == CPP_OPEN_PAREN)
+ {
+ token = pragma_lex (&x);
+ if (token == CPP_STRING)
+ {
+ previous_charset = cpp_opts->narrow_charset;
+ cpp_opts->narrow_charset = TREE_STRING_POINTER (x);
+ }
+ else if (token == CPP_NAME
+ && strncmp (IDENTIFIER_POINTER (x), "pop", 3) == 0)
+ {
+ if (previous_charset == NULL)
+ {
+ warning (OPT_Wpragmas,
+ "pop without previous exec_charset use - ignored");
+ return;
+ }
+ cpp_opts->narrow_charset = previous_charset;
+ previous_charset = NULL;
+ }
+ else
+ GCC_BAD ("expected a charset string or pop after %<#pragma exec_charset%>");
+
+ if (pragma_lex (&x) != CPP_CLOSE_PAREN)
+ GCC_BAD ("malformed %<#pragma exec_charset%>, ignored");
+ }
+ else
+ GCC_BAD ("expected a string after %<#pragma exec_charset%>");
+
+ if (pragma_lex (&x) != CPP_EOF)
+ warning (OPT_Wpragmas, "junk at end of %<#pragma exec_charset%>");
+
+ inform (input_location, "switching to exec charset: %s",
+ cpp_opts->narrow_charset);
+ cpp_destroy_iconv (parse_in);
+ cpp_init_iconv (parse_in);
+}
+
/* Mark whether the current location is valid for a STDC pragma. */
static bool valid_location_for_stdc_pragma;
@@ -1571,6 +1619,8 @@ init_pragma (void)
handle_pragma_redefine_extname);
c_register_pragma_with_expansion (0, "message", handle_pragma_message);
+ c_register_pragma_with_expansion ("GCC", "exec_charset",
+ handle_pragma_exec_charset);
#ifdef REGISTER_TARGET_PRAGMAS
REGISTER_TARGET_PRAGMAS ();
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index d9b7a54..b67993a 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21611,6 +21611,7 @@ for further explanation.
* Push/Pop Macro Pragmas::
* Function Specific Option Pragmas::
* Loop-Specific Pragmas::
+* Charset-Specific Pragmas::
@end menu
@node AArch64 Pragmas
@@ -22209,6 +22210,31 @@ void ignore_vec_dep (int *a, int k, int c, int m)
@}
@end smallexample
+@node Charset-Specific Pragmas
+@subsection Charset-Specific Pragmas
+
+@table @code
+@item #pragma GCC exec_charset(@var{"charset"})
+@cindex pragma GCC exec_charset
+
+Set the execution character set, used for string and character
+constants. The default is the exec charset specified with
+@option{-fexec-charset} or UTF-8 if @option{-fexec-charset} isn't used.
+charset can be any encoding supported by the system's "iconv" library
+routine. The special value @var{pop} (without ``) can be
+used to switch back to the exec charset before the last @code{#pragma
+GCC exec_charset} setting.
+@end table
+
+@smallexample
+call_with_utf8("hello world");
+
+#pragma GCC exec_charset("UTF16")
+call_with_utf16("hello world");
+
+#pragma GCC exec_charset(pop)
+call_with_utf8("hello world");
+@end smallexample
@node Unnamed Fields
@section Unnamed Structure and Union Fields
diff --git a/gcc/testsuite/gcc.dg/pragma-exec_charset-1.c b/gcc/testsuite/gcc.dg/pragma-exec_charset-1.c
new file mode 100644
index 0000000..5c695aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pragma-exec_charset-1.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+
+char t1[] = "hello world";
+#pragma GCC exec_charset("EBCDIC-US")
+char t2[] = "hello world";
+#pragma GCC exec_charset(pop)
+char t3[] = "hello world";
+
+char hello_world_utf8[12] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x00 };
+char hello_world_ebcdic[12] = { 0x88, 0x85, 0x93, 0x93, 0x96, 0x40, 0xa6, 0x96, 0x99, 0x93, 0x84, 0x00 };
+
+int
+main ()
+{
+ if (__builtin_memcmp (t1, hello_world_utf8, 12) != 0)
+ __builtin_abort ();
+
+ if (__builtin_memcmp (t2, hello_world_ebcdic, 12) != 0)
+ __builtin_abort ();
+
+ if (__builtin_memcmp (t3, hello_world_utf8, 12) != 0)
+ __builtin_abort ();
+
+}
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 6a3bbbc..47fa406 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -737,7 +737,7 @@ cpp_init_iconv (cpp_reader *pfile)
/* Destroy iconv(3) descriptors set up by cpp_init_iconv, if necessary. */
void
-_cpp_destroy_iconv (cpp_reader *pfile)
+cpp_destroy_iconv (cpp_reader *pfile)
{
if (HAVE_ICONV)
{
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 804132a..acbdf5a 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -861,6 +861,9 @@ extern void cpp_post_options (cpp_reader *);
/* Set up translation to the target character set. */
extern void cpp_init_iconv (cpp_reader *);
+/* Cleanup translation to the target character set. */
+extern void cpp_destroy_iconv (cpp_reader *);
+
/* Call this to finish preprocessing. If you requested dependency
generation, pass an open stream to write the information to,
otherwise NULL. It is your responsibility to close the stream. */
diff --git a/libcpp/init.c b/libcpp/init.c
index 16ff202..4e68645 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -314,7 +314,7 @@ cpp_destroy (cpp_reader *pfile)
_cpp_destroy_hashtable (pfile);
_cpp_cleanup_files (pfile);
- _cpp_destroy_iconv (pfile);
+ cpp_destroy_iconv (pfile);
_cpp_free_buff (pfile->a_buff);
_cpp_free_buff (pfile->u_buff);
diff --git a/libcpp/internal.h b/libcpp/internal.h
index f24e85c..ce2d902 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -757,7 +757,6 @@ extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
cppchar_t *,
source_range *char_range,
cpp_string_location_reader *loc_reader);
-extern void _cpp_destroy_iconv (cpp_reader *);
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
unsigned char *, size_t, size_t,
const unsigned char **, off_t *);
--
2.9.1
More information about the Gcc-patches
mailing list