This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: fixincludes: yet another grab bag
- To: gcc-patches at gcc dot gnu dot org
- Subject: Re: fixincludes: yet another grab bag
- From: Zack Weinberg <zack at wolery dot cumb dot org>
- Date: Tue, 18 Jan 2000 13:40:46 -0800
- Cc: bkorb at sco dot com
- References: <20000118102755.A19785@wolery.cumb.org>
Here's the revised patch. Thanks to all who commented. Changes from
previous version:
- style and speed fixes, else_endif_label
- don't munge __mips and the like
zw
* fixfixes.c (fix_char_macro_uses, fix_char_macro_defines):
New functions.
(IO_use, IO_defn, CTRL_use, CTRL_defn): New fixes.
* inclhack.def: Replace io_def_quotes, ioctl_fix_ctrl, and
bsd43_io_macros with new hacks using the above C fixes.
* fixlib.c (is_cxx_header): Use a regexp for the file-contents
search. Recognize emacs mode markers.
* fixtests.c (else_endif_label): Disentangle spaghetti code.
Fix bug causing C++ comments in C++ headers to be rejected.
* inclhack.def (avoid_bool): Add select and bypass
expressions.
(math_exception): Escape '+' in regular expression.
(machine_name): Don't munge __mips, __i386, and the like; only
mips, _mips, i386, _i386, etc.
===================================================================
Index: fixfixes.c
--- fixfixes.c 2000/01/17 21:45:28 1.6
+++ fixfixes.c 2000/01/18 21:31:03
@@ -73,9 +73,12 @@ typedef struct {
#define FIXUP_TABLE \
_FT_( "no_double_slash", double_slash_fix ) \
- _FT_( "else_endif_label", else_endif_label_fix )
+ _FT_( "else_endif_label", else_endif_label_fix ) \
+ _FT_( "IO_use", IO_use_fix ) \
+ _FT_( "CTRL_use", CTRL_use_fix) \
+ _FT_( "IO_defn", IO_defn_fix ) \
+ _FT_( "CTRL_defn", CTRL_defn_fix )
-
#define FIX_PROC_HEAD( fix ) \
static void fix ( filname, text ) \
const char* filname; \
@@ -352,6 +355,195 @@ FIX_PROC_HEAD( else_endif_label_fix )
} /* for (entire file) loop */
return;
+}
+
+/* Scan the input file for all occurrences of text like this:
+
+ #define TIOCCONS _IO(T, 12)
+
+ and change them to read like this:
+
+ #define TIOCCONS _IO('T', 12)
+
+ which is the required syntax per the C standard. (The definition of
+ _IO also has to be tweaked - see below.) 'IO' is actually whatever you
+ provide in the STR argument. */
+void
+fix_char_macro_uses (text, str)
+ const char *text;
+ const char *str;
+{
+ /* This regexp looks for a traditional-syntax #define (# in column 1)
+ of an object-like macro. */
+ static const char pat[] =
+ "^#[ \t]*define[ \t]+[A-Za-z][A-Za-z0-9]*[ \t]+";
+ static regex_t re;
+
+ regmatch_t rm[1];
+ const char *p, *limit;
+ size_t len = strlen (str);
+
+ compile_re (pat, &re, 1, "macro pattern", "fix_char_macro_uses");
+
+ for (p = text;
+ regexec (&re, p, 1, rm, 0) == 0;
+ p = limit + 1)
+ {
+ /* p + rm[0].rm_eo is the first character of the macro replacement.
+ Find the end of the macro replacement, and the STR we were
+ sent to look for within the replacement. */
+ p += rm[0].rm_eo;
+ limit = p - 1;
+ do
+ {
+ limit = strchr (limit + 1, '\n');
+ if (!limit)
+ goto done;
+ }
+ while (limit[-1] == '\\');
+
+ do
+ {
+ if (*p == str[0] && !strncmp (p+1, str+1, len-1))
+ goto found;
+ }
+ while (++p < limit - len);
+ /* Hit end of line. */
+ continue;
+
+ found:
+ /* Found STR on this line. If the macro needs fixing,
+ the next few chars will be whitespace or uppercase,
+ then an open paren, then a single letter. */
+ while ((isspace (*p) || isupper (*p)) && p < limit) p++;
+ if (*p++ != '(')
+ continue;
+ if (!isalpha (*p))
+ continue;
+ if (isalnum (p[1]) || p[1] == '_')
+ continue;
+
+ /* Splat all preceding text into the output buffer,
+ quote the character at p, then proceed. */
+ fwrite (text, 1, p - text, stdout);
+ putchar ('\'');
+ putchar (*p);
+ putchar ('\'');
+ text = p + 1;
+ }
+ done:
+ fputs (text, stdout);
+}
+
+/* Scan the input file for all occurrences of text like this:
+
+ #define _IO(x, y) ('x'<<16+y)
+
+ and change them to read like this:
+
+ #define _IO(x, y) (x<<16+y)
+
+ which is the required syntax per the C standard. (The uses of _IO
+ also have to be tweaked - see above.) 'IO' is actually whatever
+ you provide in the STR argument. */
+void
+fix_char_macro_defines (text, str)
+ const char *text;
+ const char *str;
+{
+ /* This regexp looks for any traditional-syntax #define (# in column 1). */
+ static const char pat[] =
+ "^#[ \t]*define[ \t]+";
+ static regex_t re;
+
+ regmatch_t rm[1];
+ const char *p, *limit;
+ size_t len = strlen (str);
+ char arg;
+
+ compile_re (pat, &re, 1, "macro pattern", "fix_char_macro_defines");
+
+ for (p = text;
+ regexec (&re, p, 1, rm, 0) == 0;
+ p = limit + 1)
+ {
+ /* p + rm[0].rm_eo is the first character of the macro name.
+ Find the end of the macro replacement, and the STR we were
+ sent to look for within the name. */
+ p += rm[0].rm_eo;
+ limit = p - 1;
+ do
+ {
+ limit = strchr (limit + 1, '\n');
+ if (!limit)
+ goto done;
+ }
+ while (limit[-1] == '\\');
+
+ do
+ {
+ if (*p == str[0] && !strncmp (p+1, str+1, len-1))
+ goto found;
+ }
+ while (isalpha (*p) || isalnum (*p) || *p == '_');
+ /* Hit end of macro name without finding the string. */
+ continue;
+
+ found:
+ /* Found STR in this macro name. If the macro needs fixing,
+ there may be a few uppercase letters, then there will be an
+ open paren with _no_ intervening whitespace, and then a
+ single letter. */
+ while (isupper (*p) && p < limit) p++;
+ if (*p++ != '(')
+ continue;
+ if (!isalpha (*p))
+ continue;
+ if (isalnum (p[1]) || p[1] == '_')
+ continue;
+
+ /* The character at P is the one to look for in the following
+ text. */
+ arg = *p;
+ p += 2;
+
+ while (p < limit)
+ {
+ if (p[-1] == '\'' && p[0] == arg && p[1] == '\'')
+ {
+ /* Remove the quotes from this use of ARG. */
+ p--;
+ fwrite (text, 1, p - text, stdout);
+ putchar (arg);
+ p += 3;
+ text = p;
+ }
+ else
+ p++;
+ }
+ }
+ done:
+ fputs (text, stdout);
+}
+
+/* The various prefixes on these macros are handled automatically
+ because the fixers don't care where they start matching. */
+FIX_PROC_HEAD( IO_use_fix )
+{
+ fix_char_macro_uses (text, "IO");
+}
+FIX_PROC_HEAD( CTRL_use_fix )
+{
+ fix_char_macro_uses (text, "CTRL");
+}
+
+FIX_PROC_HEAD( IO_defn_fix )
+{
+ fix_char_macro_defines (text, "IO");
+}
+FIX_PROC_HEAD( CTRL_defn_fix )
+{
+ fix_char_macro_defines (text, "CTRL");
}
/* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
===================================================================
Index: fixlib.c
--- fixlib.c 2000/01/17 21:45:29 1.5
+++ fixlib.c 2000/01/18 21:31:04
@@ -112,43 +112,27 @@ is_cxx_header (fname, text)
}
} not_cxx_name:;
- /* Or it might contain the phrase 'extern "C++"' */
- for (;;)
+ /* Or it might contain one of several phrases which indicate C++ code.
+ Currently recognized are:
+ extern "C++"
+ -*- (Mode: )? C++ -*- (emacs mode marker)
+ template <
+ */
{
- tSCC zExtern[] = "extern";
- tSCC zExtCxx[] = "\"C++\"";
- tSCC zTemplate[] = "template";
+ tSCC cxxpat[] = "\
+extern[ \t]*\"C\\+\\+\"|\
+-\\*-[ \t]*([mM]ode:[ \t]*)?[cC]\\+\\+[; \t]*-\\*-|\
+template[ \t]*<";
+ static regex_t cxxre;
+ static int compiled;
- switch (*(text++))
- {
- case 'e':
- /* Check for "extern \"C++\"" */
- if (strncmp (text, zExtern+1, sizeof( zExtern )-2) != 0)
- break;
- text += sizeof( zExtern )-2;
- if (! isspace( *(text++)) )
- break;
- while (isspace( *text )) text++;
- if (strncmp (text, zExtCxx, sizeof (zExtCxx) -1) == 0)
- return BOOL_TRUE;
- break;
+ if (!compiled)
+ compile_re (cxxpat, &cxxre, 0, "contents check", "is_cxx_header");
- case 't':
- /* Check for "template<" */
- if (strncmp (text, zTemplate+1, sizeof( zTemplate )-2) != 0)
- break;
- text += sizeof( zTemplate )-2;
- while (isspace( *text )) text++;
- if (*text == '<')
- return BOOL_TRUE;
- break;
-
- case NUL:
- goto text_done;
- break;
- }
- } text_done:;
-
+ if (regexec (&cxxre, text, 0, 0, 0) == 0)
+ return BOOL_TRUE;
+ }
+
return BOOL_FALSE;
}
===================================================================
Index: fixtests.c
--- fixtests.c 2000/01/17 21:45:29 1.8
+++ fixtests.c 2000/01/18 21:31:04
@@ -153,9 +153,9 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
static regex_t label_re;
char ch;
- tCC* pz_next = (char*)NULL;
+ tCC* pz_next;
+ tCC* all_text = text;
regmatch_t match[2];
- t_bool file_is_cxx = is_cxx_header( fname, text );
/*
This routine may be run many times within a single execution.
@@ -170,21 +170,15 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
for (;;) /* entire file */
{
- /*
- See if we need to advance to the next candidate directive
- If the scanning pointer passes over the end of the directive,
- then the directive is inside a comment */
- if (pz_next < text)
+ /* Find the next else or endif in the file. */
+ if (regexec (&label_re, text, 2, match, 0) != 0)
+ break;
+ pz_next = text + match[0].rm_eo;
+
+ /* Scan from where we are up to that position, to make sure
+ we didn't find something in a string or comment. */
+ while (pz_next > text)
{
- if (regexec (&label_re, text, 2, match, 0) != 0)
- break;
- pz_next = text + match[0].rm_eo;
- }
-
- /*
- IF the scan pointer has not reached the directive end, ... */
- if (pz_next > text)
- {
/*
Advance the scanning pointer. If we are at the start
of a quoted string or a comment, then skip the entire unit */
@@ -209,34 +203,30 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
case '\'':
text = skip_quote( ch, text );
break;
- } /* switch (ch) */
- continue;
- } /* if (still shy of directive end) */
+ }
+ }
+ if (pz_next < text)
+ continue;
- /*
- The scanning pointer (text) has reached the end of the current
- directive under test, then check for bogons here */
- for (;;) /* bogon check */
+ /* We're at the end of a real directive. Check for bogons here. */
+ for (;;)
{
char ch = *(pz_next++);
- if (isspace (ch))
- {
- if (ch == '\n')
- {
- /*
- It is clean. No bogons on this directive */
- text = pz_next;
- pz_next = (char*)NULL; /* force a new regex search */
- break;
- }
- continue;
- }
-
- switch (ch)
- {
+ switch (ch)
+ {
+ case '\n':
+ /* It is clean. No bogons on this directive. */
+ goto next_directive;
+
+ case '\t':
+ case '\v':
+ case '\f':
+ case ' ':
+ /* Whitespace is allowed. */
+ break;
+
case '\\':
- /*
- Skip escaped newlines. Otherwise, we have a bogon */
+ /* Skip escaped newlines. Otherwise, we have a bogon. */
if (*pz_next != '\n')
return APPLY_FIX;
@@ -244,42 +234,41 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
break;
case '/':
- /*
- Skip comments. Otherwise, we have a bogon */
- switch (*pz_next)
- {
- case '/':
- /* IF we found a "//" in a C header, THEN fix it. */
- if (! file_is_cxx)
+ /* Skip comments. Otherwise, we have a bogon */
+ switch (*pz_next)
+ {
+ case '/':
+ /* // in a C header is a bogon. */
+ if (! is_cxx_header( fname, all_text ))
return APPLY_FIX;
- /* C++ header. Skip to newline and continue. */
+ /* C++ comment is allowed in a C++ header.
+ Skip to newline and continue. */
pz_next = strchr( pz_next+1, '\n' );
if (pz_next == (char*)NULL)
return SKIP_FIX;
pz_next++;
- break;
+ goto next_directive;
- case '*':
+ case '*':
/* A comment for either C++ or C. Skip over it. */
pz_next = strstr( pz_next+1, "*/" );
if (pz_next == (char*)NULL)
return SKIP_FIX;
pz_next += 2;
- break;
+ break;
- default:
- /* a '/' followed by other junk. */
- return APPLY_FIX;
- }
- break; /* a C or C++ comment */
+ default:
+ return APPLY_FIX;
+ }
+ break;
default:
- /*
- GOTTA BE A BOGON */
return APPLY_FIX;
} /* switch (ch) */
} /* for (bogon check loop) */
+ next_directive:;
+ text = pz_next;
} /* for (entire file) loop */
return SKIP_FIX;
===================================================================
Index: inclhack.def
--- inclhack.def 2000/01/17 21:45:29 1.47
+++ inclhack.def 2000/01/18 21:37:24
@@ -631,6 +631,9 @@ fix = {
files = term.h;
files = tinfo.h;
+ select = "char[ \t]+bool|bool[ \t]+char";
+ bypass = "we must use the C\\+\\+ compiler's type";
+
sed = "/^#[ \t]*define[ \t][ \t]*bool[ \t][ \t]*char[ \t]*$/i\\\n"
"#ifndef __cplusplus\n";
@@ -749,21 +752,6 @@ fix = {
/*
- * Note that BSD43_* are used on recent MIPS systems.
- */
-fix = {
- hackname = bsd43_io_macros;
- select = "BSD43__IO";
- /*
- * Put single quotes aroung the character that appears after '('
- * and before ',', UNLESS it is a 'c' or 'g' or 'x'.
- */
- sed = "/[ \t]BSD43__IO[A-Z]*[ \t]*(/" 's/(\(.\),/(\'\1\',/';
- sed = "/#[ \t]*define[ \t]*[ \t]BSD43__IO/" 's/\'\([cgx]\)\'/\1/g';
-};
-
-
-/*
* Fix <c_asm.h> on Digital UNIX V4.0:
* It contains a prototype for a DEC C internal asm() function,
* clashing with gcc's asm keyword. So protect this with __DECC.
@@ -970,52 +958,45 @@ fix = {
};
/*
- * Fix various _IO* defines, but do *not* quote the characters cgxtf.
+ * Fix various macros used to define ioctl numbers. The traditional
+ * syntax was
+ * #define _IO(n, x) (('n'<<8)+x)
+ * #define TIOCFOO _IO(T, 1)
+ * but this does not work with the C standard, which disallows macro
+ * expansion inside strings. We have to rewrite it thus:
+ * #define _IO(n, x) ((n<<8)+x)
+ * #define TIOCFOO _IO('T', 1)
+ * The select expressions match too much, but the c_fix code is cautious.
+ *
+ * _IO might be: _IO DESIO BSD43__IO with W, R, WR, C, ... suffixes.
*/
fix = {
- hackname = io_def_quotes;
- select = "[ \t]*[ \t](_|DES)IO[A-Z]*[ \t]*\\( *[^,']";
- sed = "s/\\([ \t]*[ \t]_IO[A-Z]*[ \t]*(\\)\\([^,']\\),/\\1'\\2',/";
- sed = "s/\\([ \t]*[ \t]DESIO[A-Z]*[ \t]*(\\)\\([^,']\\),/\\1'\\2',/";
- sed = "/#[ \t]*define[ \t]*[ \t]_IO/" "s/'\\([cgxtf]\\)'/\\1/g";
- sed = "/#[ \t]*define[ \t]*[ \t]DESIOC/" 's/\'\([cdgx]\)\'/\1/g';
+ hackname = io_use_quotes;
+ select = "define[ \t]+[A-Z0-9_]+[ \t]+[A-Z0-9_]+IO[A-Z]*[ \t]*\\( *[^,']";
+ c_fix = IO_use;
};
-
+fix = {
+ hackname = io_def_quotes;
+ select = "define[ \t]+[A-Z0-9_]+IO[A-Z]*\\(([a-zA-Z]).*'\\1'";
+ c_fix = IO_defn;
+};
+
+
/*
- * Fix CTRL macros
- *
- * Basically, what is supposed to be happening is that every
- * _invocation_ of the "_CTRL()" or "CTRL()" macros is supposed to have
- * its argument inserted into single quotes. We _must_ do this because
- * ANSI macro substitution rules prohibit looking inside quoted strings
- * for the substitution names. A side effect is that the quotes are
- * inserted in the definitions of those macros as well. So, the last
- * several sed expressions are supposed to clean up the definitions, as
- * long as those definitions are using "c", "g" or "x" as the macro
- * argument :). Yuck.
+ * Same deal for CTRL() macros.
+ * CTRL might be: CTRL _CTRL ISCTRL BSD43_CTRL ...
*/
fix = {
- hackname = ioctl_fix_ctrl;
- select = "CTRL[ \t]*\\(";
-
- sed = "/[^A-Z0-9_]CTRL[ \t]*(/"
- "s/\\([^']\\))/'\\1')/";
-
- sed = "/[^A-Z0-9]_CTRL[ \t]*(/"
- "s/\\([^']\\))/'\\1')/";
-
- sed = "/#[ \t]*define[ \t]*[ \t]CTRL/"
- "s/'\\([cgx]\\)'/\\1/g";
-
- sed = "/#[ \t]*define[ \t]*[ \t]_CTRL/"
- "s/'\\([cgx]\\)'/\\1/g";
-
- sed = "/#[ \t]*define[ \t]*[ \t]BSD43_CTRL/"
- "s/'\\([cgx]\\)'/\\1/g";
-
- sed = "/#[ \t]*define[ \t]*[ \t][_]*ISCTRL/"
- "s/'\\([cgx]\\)'/\\1/g";
+ hackname = ctrl_use_quotes;
+ select = "define[ \t]+[A-Z0-9_]+[ \t]+[A-Z0-9_]+CTRL[ \t]*\\( *[^,']";
+ c_fix = CTRL_use;
+};
+
+fix = {
+ hackname = ctrl_def_quotes;
+ select = "define[ \t]+[A-Z0-9_]+CTRL\\(([a-zA-Z]).*'\\1'";
+ c_fix = CTRL_defn;
};
@@ -1292,16 +1273,16 @@ fix = {
* The fixinc_eol stuff is to work around a bug in the sed
*/
select = "^#[ \t]*(if|elif).*"
- "[^a-zA-Z0-9_](_*[MSRrhim]|[Mbimnpstuv])[a-zA-Z0-9_]";
+ "[^a-zA-Z0-9_](_?[MSRrhim]|[Mbimnpstuv])[a-zA-Z0-9_]";
exesel = "^#[ \t]*(if|elif).*[^a-zA-Z0-9_]"
"("
"M32"
- "|_*MIPSE[LB]"
- "|_*SYSTYPE_[A-Z0-9]"
- "|_*[Rr][34]000"
- "|_*host_mips"
- "|_*i386"
- "|_*mips"
+ "|_?MIPSE[LB]"
+ "|_?SYSTYPE_[A-Z0-9]"
+ "|_?[Rr][34]000"
+ "|_?host_mips"
+ "|_?i386"
+ "|_?mips"
"|bsd4"
"|is68k"
"|m[68]8k"
@@ -1332,12 +1313,18 @@ fix = {
"\ts/[a-zA-Z0-9_][a-zA-Z0-9_]*/ & /g\n"
"\ts/ M32 / __M32__ /g\n"
- "\ts/ _*MIPSE\\([LB]\\) / __MIPSE\\1__ /g\n"
- "\ts/ _*SYSTYPE_\\([A-Z0-9]*\\) / __SYSTYPE_\\1__ /g\n"
- "\ts/ _*\\([Rr][34]\\)000 / __\\1000__ /g\n"
- "\ts/ _*host_mips / __host_mips__ /g\n"
- "\ts/ _*i386 / __i386__ /g\n"
- "\ts/ _*mips / __mips__ /g\n"
+ "\ts/ MIPSE\\([LB]\\) / __MIPSE\\1__ /g\n"
+ "\ts/ _MIPSE\\([LB]\\) / __MIPSE\\1__ /g\n"
+ "\ts/ SYSTYPE_\\([A-Z0-9]*\\) / __SYSTYPE_\\1__ /g\n"
+ "\ts/ _SYSTYPE_\\([A-Z0-9]*\\) / __SYSTYPE_\\1__ /g\n"
+ "\ts/ \\([Rr][34]\\)000 / __\\1000__ /g\n"
+ "\ts/ _\\([Rr][34]\\)000 / __\\1000__ /g\n"
+ "\ts/ host_mips / __host_mips__ /g\n"
+ "\ts/ _host_mips / __host_mips__ /g\n"
+ "\ts/ i386 / __i386__ /g\n"
+ "\ts/ _i386 / __i386__ /g\n"
+ "\ts/ mips / __mips /g\n"
+ "\ts/ _mips / __mips /g\n"
"\ts/ bsd4\\([0-9]\\) / __bsd4\\1__ /g\n"
"\ts/ is68k / __is68k__ /g\n"
"\ts/ m68k / __m68k__ /g\n"
@@ -1372,7 +1359,7 @@ fix = {
hackname = math_exception;
files = math.h;
select = "struct exception";
- bypass = "We have a problem when using C++";
+ bypass = "We have a problem when using C\\+\\+";
sed = "/struct exception/i\\\n"
"#ifdef __cplusplus\\\n"
"#define exception __math_exception\\\n"