This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

fixincludes: replacement for machine_name fix


This patch replaces the machine_name fix with one that modifies only
ifdefs that depend on a predefined identifier that this particular
target might actually define.  In other words, on i686-linux the only
predefines outside the reserved name space are 'i386', 'unix', and
'linux', so only #ifdefs referring to those will be modified.
Furthermore, if the line reads something like

#if defined linux || defined __linux__

then it will not be modified.  (Yes, that's a real-world example.)
This cuts in half the number of headers modified on my system.

The patch also changes the behavior of the %P spec to be somewhat more
predictable.  %P transforms '-Dlinux' into '-D__linux__' and
'-D__linux'.  It used to turn '-D_mips' into -D___mips__' and
'-D___mips' - note the triple underscore.  Now it will produce
'__mips__' and '__mips' just as '-Dmips' would have.  That change
needs separate review, and if it is rejected, then the fixincludes
patch needs to be tweaked not to expect the new behavior.

I would appreciate testing of this patch and of the previous (now
committed) patch on systems where fixincludes is expected to do real
work.

Bruce: this patch removes the only use of exesel, and therefore will
allow us to throw that away.

zw

	* Makefile.in (fixinc.sh): Depend on specs.
	* fixinc/Makefile.in: Add rule to create machname.h.
	(fixlib.o): Depend on machname.h.
	* fixinc/fixtests.c (machine_name): New test.
	* fixinc/fixfixes.c (machine_name): New fix.
	* fixinc/fixlib.c (mn_get_regexps): New helper function for
	the machine_name test and fix.
	* fixinc/fixlib.h: Prototype it.
	* fixinc/inclhack.def (machine_name): Use the C test and fix.
	
	* gcc.c (do_spec_1) [case P]: Take care not to create
	identifiers with three leading or trailing underscores.
	

===================================================================
Index: Makefile.in
--- Makefile.in	2000/01/19 09:42:10	1.367
+++ Makefile.in	2000/01/19 21:51:53
@@ -2161,7 +2161,7 @@ FIXINCSRCDIR=$(srcdir)/fixinc
 fixinc.sh: $(FIXINCSRCDIR)/mkfixinc.sh $(FIXINCSRCDIR)/fixincl.c \
 	$(FIXINCSRCDIR)/procopen.c $(FIXINCSRCDIR)/gnu-regex.c \
 	$(FIXINCSRCDIR)/server.c $(FIXINCSRCDIR)/gnu-regex.h \
-	$(FIXINCSRCDIR)/server.h $(FIXINCSRCDIR)/inclhack.def
+	$(FIXINCSRCDIR)/server.h $(FIXINCSRCDIR)/inclhack.def specs
 	MAKE="$(MAKE)"; srcdir=`cd $(srcdir)/fixinc; pwd` ; \
 	export MAKE srcdir ; \
 	cd ./fixinc; $(SHELL) $${srcdir}/mkfixinc.sh $(target)
===================================================================
Index: gcc.c
--- gcc.c	2000/01/14 17:14:43	1.127
+++ gcc.c	2000/01/19 21:51:54
@@ -4263,7 +4263,12 @@ do_spec_1 (spec, inswitch, soft_matched_
 	      char *y;
 
 	      /* Copy all of CPP_PREDEFINES into BUF,
-		 but put __ after every -D and at the end of each arg.  */
+		 but force them all into the reserved name space if they			 aren't already there.  The reserved name space is all
+		 identifiers beginning with two underscores or with one
+		 underscore and a capital letter.  We do the forcing by
+		 adding up to two underscores to the beginning and end
+		 of each symbol. e.g. mips, _mips, mips_, and _mips_ all
+		 become __mips__.  */
 	      y = cpp_predefines;
 	      while (*y != 0)
 		{
@@ -4279,8 +4284,9 @@ do_spec_1 (spec, inswitch, soft_matched_
 			      && ! ISUPPER ((unsigned char)*(y+1))))
 		        {
 			  /* Stick __ at front of macro name.  */
+			  if (*y != '_')
+			    *x++ = '_';
 			  *x++ = '_';
-			  *x++ = '_';
 			  /* Arrange to stick __ at the end as well.  */
 			  flag = 1;
 			}
@@ -4291,8 +4297,12 @@ do_spec_1 (spec, inswitch, soft_matched_
 
 		      if (flag)
 		        {
-			  *x++ = '_';
-			  *x++ = '_';
+			  if (x[-1] != '_')
+			    {
+			      if (x[-2] != '_')
+				*x++ = '_';
+			      *x++ = '_';
+			    }
 			}
 
 		      /* Copy the value given, if any.  */
@@ -4324,7 +4334,8 @@ do_spec_1 (spec, inswitch, soft_matched_
 			  /* Stick -D__ at front of macro name.  */
 			  *x++ = '-';
 			  *x++ = 'D';
-			  *x++ = '_';
+			  if (*y != '_')
+			    *x++ = '_';
 			  *x++ = '_';
 
 			  /* Copy the macro name.  */
===================================================================
Index: fixinc/Makefile.in
--- fixinc/Makefile.in	2000/01/19 21:41:04	1.12
+++ fixinc/Makefile.in	2000/01/19 21:51:54
@@ -97,6 +97,7 @@ gnu-regex.o: gnu-regex.c
 fixincl.o : fixincl.x fixincl.c fixfixes.c fixtests.c
 server.o : server.c server.h
 procopen.o : procopen.c server.h
+fixlib.o: machname.h
 
 fixincl.x: fixincl.tpl inclhack.def
 	cd $(srcdir) ; ./genfixes $@
@@ -139,3 +140,16 @@ install: $(TARGETS)
 Makefile: Makefile.in ../config.status
 	cd .. \
 	  && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+# Black magic.
+# Note dependency on ASCII. \040 = space, \011 = tab, \012 = newline.
+
+machname.h: ../specs
+	@tr -s '\040\011' '\012\012' < ../specs | \
+	    sed -n 's/^.*-D\([a-zA-Z_][a-zA-Z0-9_]*\).*$$/\1/p' | sort -u | \
+	    grep -v '^_[_A-Z]' > mn.T
+	@echo "Forbidden identifiers: `tr '\012' ' ' <mn.T`"
+	@sed 's/^/\\\\</; s/$$/\\\\>/' <mn.T | tr '\012' '|' | \
+	     sed 's/^/#define MN_NAME_PAT "/; s/|$$/"/' > machname.h
+	@echo >> machname.h
+	@-rm -f mn.T
===================================================================
Index: fixinc/fixfixes.c
--- fixinc/fixfixes.c	2000/01/19 21:41:04	1.7
+++ fixinc/fixfixes.c	2000/01/19 21:51:55
@@ -77,7 +77,8 @@ typedef struct {
   _FT_( "IO_use",	    IO_use_fix ) \
   _FT_( "CTRL_use",	    CTRL_use_fix) \
   _FT_( "IO_defn",	    IO_defn_fix ) \
-  _FT_( "CTRL_defn",	    CTRL_defn_fix )
+  _FT_( "CTRL_defn",	    CTRL_defn_fix ) \
+  _FT_( "machine_name",	    machine_name_fix )
 
 
 #define FIX_PROC_HEAD( fix ) \
@@ -545,6 +546,100 @@ FIX_PROC_HEAD( IO_defn_fix )
 FIX_PROC_HEAD( CTRL_defn_fix )
 {
   fix_char_macro_defines (text, "CTRL");
+}
+
+
+/* Fix for machine name #ifdefs that are not in the namespace reserved
+   by the C standard.  They won't be defined if compiling with -ansi,
+   and the headers will break.  We go to some trouble to only change
+   #ifdefs where the macro is defined by GCC in non-ansi mode; this
+   minimizes the number of headers touched.  */
+
+FIX_PROC_HEAD( machine_name_fix )
+{
+  regmatch_t match[2];
+  char *line, *base, *limit, *p, *q;
+  regex_t *label_re, *name_re;
+  char *scratch;
+  size_t len;
+
+  len = mn_get_regexps (&label_re, &name_re, "machine_name_fix");
+  scratch = xmalloc (len);
+
+  for (base = text;
+       regexec (label_re, base, 2, match, 0) == 0;
+       base = limit)
+    {
+      base += match[0].rm_eo;
+      /* We're looking at an #if or #ifdef.  Scan forward for the
+	 next non-escaped newline.  */
+      line = limit = base;
+      do
+	{
+	  limit++;
+	  limit = strchr (limit, '\n');
+	  if (!limit)
+	    goto done;
+	}
+      while (limit[-1] == '\\');
+
+      /* If the 'name_pat' matches in between base and limit, we have
+	 a bogon.  It is not worth the hassle of excluding comments
+	 because comments on #if/#ifdef lines are rare, and strings on
+	 such lines are illegal.
+
+	 REG_NOTBOL means 'base' is not at the beginning of a line, which
+	 shouldn't matter since the name_re has no ^ anchor, but let's
+	 be accurate anyway.  */
+
+      for (;;)
+	{
+	again:
+	  if (base == limit)
+	    break;
+
+	  if (regexec (name_re, base, 1, match, REG_NOTBOL))
+	    goto done;  /* No remaining match in this file */
+
+	  /* Match; is it on the line?  */
+	  if (match[0].rm_eo > limit - base)
+	    break;
+
+	  p = base + match[0].rm_so;
+	  base += match[0].rm_eo;
+
+	  /* One more test: if on the same line we have the same string
+	     with the appropriate underscores, then leave it alone.
+	     We want exactly two leading and trailing underscores.  */
+	  scratch[0] = '_';
+	  scratch[1] = '_';
+	  if (*p == '_')
+	    {
+	      len = base - p - ((*base == '_') ? 2 : 1);
+	      memcpy (&scratch[2], p + 1, len);
+	    }
+	  else
+	    {
+	      len = base - p - ((*base == '_') ? 1 : 0);
+	      memcpy (&scratch[2], p, len);
+	    }
+	  len += 2;
+	  scratch[len++] = '_';
+	  scratch[len++] = '_';
+
+	  for (q = line; q <= limit - len; q++)
+	    if (*q == '_' && !strncmp (q, scratch, len))
+	      goto again;
+	  
+	  fwrite (text, 1, p - text, stdout);
+	  fwrite (scratch, 1, len, stdout);
+
+	  text = base;
+	}
+    }
+ done:
+  fputs (text, stdout);
+  free (scratch);
 }
 
 
===================================================================
Index: fixinc/fixlib.c
--- fixinc/fixlib.c	2000/01/19 21:41:04	1.6
+++ fixinc/fixlib.c	2000/01/19 21:51:55
@@ -171,3 +171,35 @@ compile_re( pat, re, match, e1, e2 )
       exit (EXIT_FAILURE);
     }
 }
+
+/* * * * * * * * * * * * *
+
+   Helper routine and data for the machine_name test and fix.
+   machname.h is created by black magic in the Makefile.  */
+
+#include "machname.h"
+
+tSCC mn_label_pat[] = "^[ \t]*#[ \t]*if(def)?[ \t]+";
+static regex_t mn_label_re;
+
+tSCC mn_name_pat[] = MN_NAME_PAT;
+static regex_t mn_name_re;
+
+static int mn_compiled = 0;
+
+size_t
+mn_get_regexps( label_re, name_re, who )
+     regex_t **label_re;
+     regex_t **name_re;
+     tCC *who;
+{
+  if (! mn_compiled)
+    {
+      compile_re (mn_label_pat, &mn_label_re, 1, "label pattern", who);
+      compile_re (mn_name_pat, &mn_name_re, 1, "name pattern", who);
+      mn_compiled++;
+    }
+  *label_re = &mn_label_re;
+  *name_re = &mn_name_re;
+  return sizeof mn_name_pat;
+}
===================================================================
Index: fixinc/fixlib.h
--- fixinc/fixlib.h	2000/01/17 21:45:29	1.5
+++ fixinc/fixlib.h	2000/01/19 21:51:55
@@ -99,4 +99,6 @@ char * load_file_data _P_(( FILE* fp ));
 t_bool is_cxx_header  _P_(( tCC* filename, tCC* filetext ));
 void   compile_re     _P_(( tCC* pat, regex_t* re, int match,
 			    tCC *e1, tCC *e2 ));
+size_t mn_get_regexps _P_(( regex_t** label_re, regex_t** name_re,
+			    tCC *who ));
 #endif /* FIXINCLUDES_FIXLIB_H */
===================================================================
Index: fixinc/fixtests.c
--- fixinc/fixtests.c	2000/01/19 21:41:04	1.9
+++ fixinc/fixtests.c	2000/01/19 21:51:55
@@ -58,7 +58,8 @@ typedef struct {
 
 #define FIX_TEST_TABLE \
   _FT_( "double_slash",     double_slash_test ) \
-  _FT_( "else_endif_label", else_endif_label_test )
+  _FT_( "else_endif_label", else_endif_label_test ) \
+  _FT_( "machine_name",     machine_name_test )
 
 
 #define TEST_FOR_FIX_PROC_HEAD( test ) \
@@ -265,6 +266,52 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
       text = pz_next;
     } /* for (entire file) loop */
 
+  return SKIP_FIX;
+}
+
+TEST_FOR_FIX_PROC_HEAD( machine_name_test )
+{
+  regex_t *label_re, *name_re;
+  regmatch_t match[2];
+  tCC *base, *limit;
+
+  mn_get_regexps(&label_re, &name_re, "machine_name_test");
+
+  for (base = text;
+       regexec (label_re, base, 2, match, 0) == 0;
+       base = limit)
+    {
+      base += match[0].rm_eo;
+      /* We're looking at an #if or #ifdef.  Scan forward for the
+	 next non-escaped newline.  */
+      limit = base;
+      do
+	{
+	  limit++;
+	  limit = strchr (limit, '\n');
+	  if (!limit)
+	    return SKIP_FIX;
+	}
+      while (limit[-1] == '\\');
+
+      /* If the 'name_pat' matches in between base and limit, we have
+	 a bogon.  It is not worth the hassle of excluding comments
+	 because comments on #if/#ifdef lines are rare, and strings on
+	 such lines are illegal.
+
+	 REG_NOTBOL means 'base' is not at the beginning of a line, which
+	 shouldn't matter since the name_re has no ^ anchor, but let's
+	 be accurate anyway.  */
+
+      if (regexec (name_re, base, 1, match, REG_NOTBOL))
+	return SKIP_FIX;  /* No match in file - no fix needed */
+
+      /* Match; is it on the line?  */
+      if (match[0].rm_eo < limit - base)
+	return APPLY_FIX;  /* Yup */
+
+      /* Otherwise, keep looking... */
+    }
   return SKIP_FIX;
 }
 
===================================================================
Index: fixinc/inclhack.def
--- fixinc/inclhack.def	2000/01/19 21:41:04	1.48
+++ fixinc/inclhack.def	2000/01/19 21:51:55
@@ -1253,93 +1253,11 @@ fix = {
 
 /*
  *  Fix non-ansi machine name defines
- *  File selection is split into two parts:  the shell version as
- *  a single patch, and the program version with each patch separate.
- *  Each is substantially faster for the particular environment.
- *  You have a dual maintenance problem here.
  */
 fix = {
     hackname = machine_name;
-    /*
-     *  Select '#if.*' and '#elif" with possible non-ansi symbols
-     *  The only non-ansi symbols we know about start with one of:
-     *     MRS_bhimnprstuv
-     *  If any are added to the substitution list, then add it to
-     *  the selection list as well.  Hopefully we can avoid names
-     *  starting with "d" and "l", because this pattern would then
-     *  match "defined" and "lint" as well.  I suppose we could add
-     *  a "bypass = lint" if we had to though.
-     *
-     * The fixinc_eol stuff is to work around a bug in the sed
-     */
-    select = "^#[ \t]*(if|elif).*"
-             "[^a-zA-Z0-9_](_*[MSRrhim]|[Mbimnpstuv])[a-zA-Z0-9_]";
-    exesel = "^#[ \t]*(if|elif).*[^a-zA-Z0-9_]"
-             "("
-                  "M32"
-                 "|_*MIPSE[LB]"
-                 "|_*SYSTYPE_[A-Z0-9]"
-                 "|_*[Rr][34]000"
-                 "|_*host_mips"
-                 "|_*i386"
-                 "|_*mips"
-                 "|bsd4"
-                 "|is68k"
-                 "|m[68]8k"
-                 "|mc680"
-                 "|news"
-                 "|ns32000"
-                 "|pdp11"
-                 "|pyr"
-                 "|sel"
-                 "|sony_news"
-                 "|sparc"
-                 "|sun"
-                 "|tahoe"
-                 "|tower"
-                 "|u370"
-                 "|u3b"
-                 "|unix"
-                 "|vax"
-             ")";
-
-    sed =      ":loop\n"
-               '/\\\\$/'                       "N\n"
-               's/\\\\$/\\\\+++fixinc_eol+++/' "\n"
-               '/\\\\$/'                       "b loop\n"
-               's/\\\\+++fixinc_eol+++/\\\\/g' "\n"
-
-          "/#[\t ]*[el]*if/ {\n"
-                "\ts/[a-zA-Z0-9_][a-zA-Z0-9_]*/ & /g\n"
-
-                "\ts/ M32 / __M32__ /g\n"
-                "\ts/ _*MIPSE\\([LB]\\) / __MIPSE\\1__ /g\n"
-                "\ts/ _*SYSTYPE_\\([A-Z0-9]*\\) / __SYSTYPE_\\1__ /g\n"
-                "\ts/ _*\\([Rr][34]\\)000 / __\\1000__ /g\n"
-                "\ts/ _*host_mips / __host_mips__ /g\n"
-                "\ts/ _*i386 / __i386__ /g\n"
-                "\ts/ _*mips / __mips__ /g\n"
-                "\ts/ bsd4\\([0-9]\\) / __bsd4\\1__ /g\n"
-                "\ts/ is68k / __is68k__ /g\n"
-                "\ts/ m68k / __m68k__ /g\n"
-                "\ts/ m88k / __m88k__ /g\n"
-                "\ts/ mc680\\([0-9]\\)0 / __mc680\\10__ /g\n"
-                "\ts/ news\\([0-9]*\\) / __news\\1__ /g\n"
-                "\ts/ ns32000 / __ns32000__ /g\n"
-                "\ts/ pdp11 / __pdp11__ /g\n"
-                "\ts/ pyr / __pyr__ /g\n"
-                "\ts/ sel / __sel__ /g\n"
-                "\ts/ sony_news / __sony_news__ /g\n"
-                "\ts/ sparc / __sparc__ /g\n"
-                "\ts/ sun\\([a-z0-9]*\\) / __sun\\1__ /g\n"
-                "\ts/ tahoe / __tahoe__ /g\n"
-                "\ts/ tower\\([_0-9]*\\) / __tower\\1__ /g\n"
-                "\ts/ u370 / __u370__ /g\n"
-                "\ts/ u3b\\([0-9]*\\) / __u3b\\1__ /g\n"
-                "\ts/ unix / __unix__ /g\n"
-                "\ts/ vax / __vax__ /g\n"
-
-                "\ts/ \\([a-zA-Z0-9_][a-zA-Z0-9_]*\\) /\\1/g\n\t}";
+    c_test   = machine_name;
+    c_fix    = machine_name;
 };
 
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]