This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
fixincludes: replacement for machine_name fix
- To: gcc-patches at gcc dot gnu dot org
- Subject: fixincludes: replacement for machine_name fix
- From: Zack Weinberg <zack at wolery dot cumb dot org>
- Date: Wed, 19 Jan 2000 14:07:49 -0800
- Cc: bkorb at sco dot com
This patch replaces the machine_name fix with one that modifies only
ifdefs that depend on a predefined identifier that this particular
target might actually define. In other words, on i686-linux the only
predefines outside the reserved name space are 'i386', 'unix', and
'linux', so only #ifdefs referring to those will be modified.
Furthermore, if the line reads something like
#if defined linux || defined __linux__
then it will not be modified. (Yes, that's a real-world example.)
This cuts in half the number of headers modified on my system.
The patch also changes the behavior of the %P spec to be somewhat more
predictable. %P transforms '-Dlinux' into '-D__linux__' and
'-D__linux'. It used to turn '-D_mips' into -D___mips__' and
'-D___mips' - note the triple underscore. Now it will produce
'__mips__' and '__mips' just as '-Dmips' would have. That change
needs separate review, and if it is rejected, then the fixincludes
patch needs to be tweaked not to expect the new behavior.
I would appreciate testing of this patch and of the previous (now
committed) patch on systems where fixincludes is expected to do real
work.
Bruce: this patch removes the only use of exesel, and therefore will
allow us to throw that away.
zw
* Makefile.in (fixinc.sh): Depend on specs.
* fixinc/Makefile.in: Add rule to create machname.h.
(fixlib.o): Depend on machname.h.
* fixinc/fixtests.c (machine_name): New test.
* fixinc/fixfixes.c (machine_name): New fix.
* fixinc/fixlib.c (mn_get_regexps): New helper function for
the machine_name test and fix.
* fixinc/fixlib.h: Prototype it.
* fixinc/inclhack.def (machine_name): Use the C test and fix.
* gcc.c (do_spec_1) [case P]: Take care not to create
identifiers with three leading or trailing underscores.
===================================================================
Index: Makefile.in
--- Makefile.in 2000/01/19 09:42:10 1.367
+++ Makefile.in 2000/01/19 21:51:53
@@ -2161,7 +2161,7 @@ FIXINCSRCDIR=$(srcdir)/fixinc
fixinc.sh: $(FIXINCSRCDIR)/mkfixinc.sh $(FIXINCSRCDIR)/fixincl.c \
$(FIXINCSRCDIR)/procopen.c $(FIXINCSRCDIR)/gnu-regex.c \
$(FIXINCSRCDIR)/server.c $(FIXINCSRCDIR)/gnu-regex.h \
- $(FIXINCSRCDIR)/server.h $(FIXINCSRCDIR)/inclhack.def
+ $(FIXINCSRCDIR)/server.h $(FIXINCSRCDIR)/inclhack.def specs
MAKE="$(MAKE)"; srcdir=`cd $(srcdir)/fixinc; pwd` ; \
export MAKE srcdir ; \
cd ./fixinc; $(SHELL) $${srcdir}/mkfixinc.sh $(target)
===================================================================
Index: gcc.c
--- gcc.c 2000/01/14 17:14:43 1.127
+++ gcc.c 2000/01/19 21:51:54
@@ -4263,7 +4263,12 @@ do_spec_1 (spec, inswitch, soft_matched_
char *y;
/* Copy all of CPP_PREDEFINES into BUF,
- but put __ after every -D and at the end of each arg. */
+ but force them all into the reserved name space if they aren't already there. The reserved name space is all
+ identifiers beginning with two underscores or with one
+ underscore and a capital letter. We do the forcing by
+ adding up to two underscores to the beginning and end
+ of each symbol. e.g. mips, _mips, mips_, and _mips_ all
+ become __mips__. */
y = cpp_predefines;
while (*y != 0)
{
@@ -4279,8 +4284,9 @@ do_spec_1 (spec, inswitch, soft_matched_
&& ! ISUPPER ((unsigned char)*(y+1))))
{
/* Stick __ at front of macro name. */
+ if (*y != '_')
+ *x++ = '_';
*x++ = '_';
- *x++ = '_';
/* Arrange to stick __ at the end as well. */
flag = 1;
}
@@ -4291,8 +4297,12 @@ do_spec_1 (spec, inswitch, soft_matched_
if (flag)
{
- *x++ = '_';
- *x++ = '_';
+ if (x[-1] != '_')
+ {
+ if (x[-2] != '_')
+ *x++ = '_';
+ *x++ = '_';
+ }
}
/* Copy the value given, if any. */
@@ -4324,7 +4334,8 @@ do_spec_1 (spec, inswitch, soft_matched_
/* Stick -D__ at front of macro name. */
*x++ = '-';
*x++ = 'D';
- *x++ = '_';
+ if (*y != '_')
+ *x++ = '_';
*x++ = '_';
/* Copy the macro name. */
===================================================================
Index: fixinc/Makefile.in
--- fixinc/Makefile.in 2000/01/19 21:41:04 1.12
+++ fixinc/Makefile.in 2000/01/19 21:51:54
@@ -97,6 +97,7 @@ gnu-regex.o: gnu-regex.c
fixincl.o : fixincl.x fixincl.c fixfixes.c fixtests.c
server.o : server.c server.h
procopen.o : procopen.c server.h
+fixlib.o: machname.h
fixincl.x: fixincl.tpl inclhack.def
cd $(srcdir) ; ./genfixes $@
@@ -139,3 +140,16 @@ install: $(TARGETS)
Makefile: Makefile.in ../config.status
cd .. \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+# Black magic.
+# Note dependency on ASCII. \040 = space, \011 = tab, \012 = newline.
+
+machname.h: ../specs
+ @tr -s '\040\011' '\012\012' < ../specs | \
+ sed -n 's/^.*-D\([a-zA-Z_][a-zA-Z0-9_]*\).*$$/\1/p' | sort -u | \
+ grep -v '^_[_A-Z]' > mn.T
+ @echo "Forbidden identifiers: `tr '\012' ' ' <mn.T`"
+ @sed 's/^/\\\\</; s/$$/\\\\>/' <mn.T | tr '\012' '|' | \
+ sed 's/^/#define MN_NAME_PAT "/; s/|$$/"/' > machname.h
+ @echo >> machname.h
+ @-rm -f mn.T
===================================================================
Index: fixinc/fixfixes.c
--- fixinc/fixfixes.c 2000/01/19 21:41:04 1.7
+++ fixinc/fixfixes.c 2000/01/19 21:51:55
@@ -77,7 +77,8 @@ typedef struct {
_FT_( "IO_use", IO_use_fix ) \
_FT_( "CTRL_use", CTRL_use_fix) \
_FT_( "IO_defn", IO_defn_fix ) \
- _FT_( "CTRL_defn", CTRL_defn_fix )
+ _FT_( "CTRL_defn", CTRL_defn_fix ) \
+ _FT_( "machine_name", machine_name_fix )
#define FIX_PROC_HEAD( fix ) \
@@ -545,6 +546,100 @@ FIX_PROC_HEAD( IO_defn_fix )
FIX_PROC_HEAD( CTRL_defn_fix )
{
fix_char_macro_defines (text, "CTRL");
+}
+
+
+/* Fix for machine name #ifdefs that are not in the namespace reserved
+ by the C standard. They won't be defined if compiling with -ansi,
+ and the headers will break. We go to some trouble to only change
+ #ifdefs where the macro is defined by GCC in non-ansi mode; this
+ minimizes the number of headers touched. */
+
+FIX_PROC_HEAD( machine_name_fix )
+{
+ regmatch_t match[2];
+ char *line, *base, *limit, *p, *q;
+ regex_t *label_re, *name_re;
+ char *scratch;
+ size_t len;
+
+ len = mn_get_regexps (&label_re, &name_re, "machine_name_fix");
+ scratch = xmalloc (len);
+
+ for (base = text;
+ regexec (label_re, base, 2, match, 0) == 0;
+ base = limit)
+ {
+ base += match[0].rm_eo;
+ /* We're looking at an #if or #ifdef. Scan forward for the
+ next non-escaped newline. */
+ line = limit = base;
+ do
+ {
+ limit++;
+ limit = strchr (limit, '\n');
+ if (!limit)
+ goto done;
+ }
+ while (limit[-1] == '\\');
+
+ /* If the 'name_pat' matches in between base and limit, we have
+ a bogon. It is not worth the hassle of excluding comments
+ because comments on #if/#ifdef lines are rare, and strings on
+ such lines are illegal.
+
+ REG_NOTBOL means 'base' is not at the beginning of a line, which
+ shouldn't matter since the name_re has no ^ anchor, but let's
+ be accurate anyway. */
+
+ for (;;)
+ {
+ again:
+ if (base == limit)
+ break;
+
+ if (regexec (name_re, base, 1, match, REG_NOTBOL))
+ goto done; /* No remaining match in this file */
+
+ /* Match; is it on the line? */
+ if (match[0].rm_eo > limit - base)
+ break;
+
+ p = base + match[0].rm_so;
+ base += match[0].rm_eo;
+
+ /* One more test: if on the same line we have the same string
+ with the appropriate underscores, then leave it alone.
+ We want exactly two leading and trailing underscores. */
+ scratch[0] = '_';
+ scratch[1] = '_';
+ if (*p == '_')
+ {
+ len = base - p - ((*base == '_') ? 2 : 1);
+ memcpy (&scratch[2], p + 1, len);
+ }
+ else
+ {
+ len = base - p - ((*base == '_') ? 1 : 0);
+ memcpy (&scratch[2], p, len);
+ }
+ len += 2;
+ scratch[len++] = '_';
+ scratch[len++] = '_';
+
+ for (q = line; q <= limit - len; q++)
+ if (*q == '_' && !strncmp (q, scratch, len))
+ goto again;
+
+ fwrite (text, 1, p - text, stdout);
+ fwrite (scratch, 1, len, stdout);
+
+ text = base;
+ }
+ }
+ done:
+ fputs (text, stdout);
+ free (scratch);
}
===================================================================
Index: fixinc/fixlib.c
--- fixinc/fixlib.c 2000/01/19 21:41:04 1.6
+++ fixinc/fixlib.c 2000/01/19 21:51:55
@@ -171,3 +171,35 @@ compile_re( pat, re, match, e1, e2 )
exit (EXIT_FAILURE);
}
}
+
+/* * * * * * * * * * * * *
+
+ Helper routine and data for the machine_name test and fix.
+ machname.h is created by black magic in the Makefile. */
+
+#include "machname.h"
+
+tSCC mn_label_pat[] = "^[ \t]*#[ \t]*if(def)?[ \t]+";
+static regex_t mn_label_re;
+
+tSCC mn_name_pat[] = MN_NAME_PAT;
+static regex_t mn_name_re;
+
+static int mn_compiled = 0;
+
+size_t
+mn_get_regexps( label_re, name_re, who )
+ regex_t **label_re;
+ regex_t **name_re;
+ tCC *who;
+{
+ if (! mn_compiled)
+ {
+ compile_re (mn_label_pat, &mn_label_re, 1, "label pattern", who);
+ compile_re (mn_name_pat, &mn_name_re, 1, "name pattern", who);
+ mn_compiled++;
+ }
+ *label_re = &mn_label_re;
+ *name_re = &mn_name_re;
+ return sizeof mn_name_pat;
+}
===================================================================
Index: fixinc/fixlib.h
--- fixinc/fixlib.h 2000/01/17 21:45:29 1.5
+++ fixinc/fixlib.h 2000/01/19 21:51:55
@@ -99,4 +99,6 @@ char * load_file_data _P_(( FILE* fp ));
t_bool is_cxx_header _P_(( tCC* filename, tCC* filetext ));
void compile_re _P_(( tCC* pat, regex_t* re, int match,
tCC *e1, tCC *e2 ));
+size_t mn_get_regexps _P_(( regex_t** label_re, regex_t** name_re,
+ tCC *who ));
#endif /* FIXINCLUDES_FIXLIB_H */
===================================================================
Index: fixinc/fixtests.c
--- fixinc/fixtests.c 2000/01/19 21:41:04 1.9
+++ fixinc/fixtests.c 2000/01/19 21:51:55
@@ -58,7 +58,8 @@ typedef struct {
#define FIX_TEST_TABLE \
_FT_( "double_slash", double_slash_test ) \
- _FT_( "else_endif_label", else_endif_label_test )
+ _FT_( "else_endif_label", else_endif_label_test ) \
+ _FT_( "machine_name", machine_name_test )
#define TEST_FOR_FIX_PROC_HEAD( test ) \
@@ -265,6 +266,52 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
text = pz_next;
} /* for (entire file) loop */
+ return SKIP_FIX;
+}
+
+TEST_FOR_FIX_PROC_HEAD( machine_name_test )
+{
+ regex_t *label_re, *name_re;
+ regmatch_t match[2];
+ tCC *base, *limit;
+
+ mn_get_regexps(&label_re, &name_re, "machine_name_test");
+
+ for (base = text;
+ regexec (label_re, base, 2, match, 0) == 0;
+ base = limit)
+ {
+ base += match[0].rm_eo;
+ /* We're looking at an #if or #ifdef. Scan forward for the
+ next non-escaped newline. */
+ limit = base;
+ do
+ {
+ limit++;
+ limit = strchr (limit, '\n');
+ if (!limit)
+ return SKIP_FIX;
+ }
+ while (limit[-1] == '\\');
+
+ /* If the 'name_pat' matches in between base and limit, we have
+ a bogon. It is not worth the hassle of excluding comments
+ because comments on #if/#ifdef lines are rare, and strings on
+ such lines are illegal.
+
+ REG_NOTBOL means 'base' is not at the beginning of a line, which
+ shouldn't matter since the name_re has no ^ anchor, but let's
+ be accurate anyway. */
+
+ if (regexec (name_re, base, 1, match, REG_NOTBOL))
+ return SKIP_FIX; /* No match in file - no fix needed */
+
+ /* Match; is it on the line? */
+ if (match[0].rm_eo < limit - base)
+ return APPLY_FIX; /* Yup */
+
+ /* Otherwise, keep looking... */
+ }
return SKIP_FIX;
}
===================================================================
Index: fixinc/inclhack.def
--- fixinc/inclhack.def 2000/01/19 21:41:04 1.48
+++ fixinc/inclhack.def 2000/01/19 21:51:55
@@ -1253,93 +1253,11 @@ fix = {
/*
* Fix non-ansi machine name defines
- * File selection is split into two parts: the shell version as
- * a single patch, and the program version with each patch separate.
- * Each is substantially faster for the particular environment.
- * You have a dual maintenance problem here.
*/
fix = {
hackname = machine_name;
- /*
- * Select '#if.*' and '#elif" with possible non-ansi symbols
- * The only non-ansi symbols we know about start with one of:
- * MRS_bhimnprstuv
- * If any are added to the substitution list, then add it to
- * the selection list as well. Hopefully we can avoid names
- * starting with "d" and "l", because this pattern would then
- * match "defined" and "lint" as well. I suppose we could add
- * a "bypass = lint" if we had to though.
- *
- * The fixinc_eol stuff is to work around a bug in the sed
- */
- select = "^#[ \t]*(if|elif).*"
- "[^a-zA-Z0-9_](_*[MSRrhim]|[Mbimnpstuv])[a-zA-Z0-9_]";
- exesel = "^#[ \t]*(if|elif).*[^a-zA-Z0-9_]"
- "("
- "M32"
- "|_*MIPSE[LB]"
- "|_*SYSTYPE_[A-Z0-9]"
- "|_*[Rr][34]000"
- "|_*host_mips"
- "|_*i386"
- "|_*mips"
- "|bsd4"
- "|is68k"
- "|m[68]8k"
- "|mc680"
- "|news"
- "|ns32000"
- "|pdp11"
- "|pyr"
- "|sel"
- "|sony_news"
- "|sparc"
- "|sun"
- "|tahoe"
- "|tower"
- "|u370"
- "|u3b"
- "|unix"
- "|vax"
- ")";
-
- sed = ":loop\n"
- '/\\\\$/' "N\n"
- 's/\\\\$/\\\\+++fixinc_eol+++/' "\n"
- '/\\\\$/' "b loop\n"
- 's/\\\\+++fixinc_eol+++/\\\\/g' "\n"
-
- "/#[\t ]*[el]*if/ {\n"
- "\ts/[a-zA-Z0-9_][a-zA-Z0-9_]*/ & /g\n"
-
- "\ts/ M32 / __M32__ /g\n"
- "\ts/ _*MIPSE\\([LB]\\) / __MIPSE\\1__ /g\n"
- "\ts/ _*SYSTYPE_\\([A-Z0-9]*\\) / __SYSTYPE_\\1__ /g\n"
- "\ts/ _*\\([Rr][34]\\)000 / __\\1000__ /g\n"
- "\ts/ _*host_mips / __host_mips__ /g\n"
- "\ts/ _*i386 / __i386__ /g\n"
- "\ts/ _*mips / __mips__ /g\n"
- "\ts/ bsd4\\([0-9]\\) / __bsd4\\1__ /g\n"
- "\ts/ is68k / __is68k__ /g\n"
- "\ts/ m68k / __m68k__ /g\n"
- "\ts/ m88k / __m88k__ /g\n"
- "\ts/ mc680\\([0-9]\\)0 / __mc680\\10__ /g\n"
- "\ts/ news\\([0-9]*\\) / __news\\1__ /g\n"
- "\ts/ ns32000 / __ns32000__ /g\n"
- "\ts/ pdp11 / __pdp11__ /g\n"
- "\ts/ pyr / __pyr__ /g\n"
- "\ts/ sel / __sel__ /g\n"
- "\ts/ sony_news / __sony_news__ /g\n"
- "\ts/ sparc / __sparc__ /g\n"
- "\ts/ sun\\([a-z0-9]*\\) / __sun\\1__ /g\n"
- "\ts/ tahoe / __tahoe__ /g\n"
- "\ts/ tower\\([_0-9]*\\) / __tower\\1__ /g\n"
- "\ts/ u370 / __u370__ /g\n"
- "\ts/ u3b\\([0-9]*\\) / __u3b\\1__ /g\n"
- "\ts/ unix / __unix__ /g\n"
- "\ts/ vax / __vax__ /g\n"
-
- "\ts/ \\([a-zA-Z0-9_][a-zA-Z0-9_]*\\) /\\1/g\n\t}";
+ c_test = machine_name;
+ c_fix = machine_name;
};