This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

cpp: fix bug with token paste at beginning of argument list


This patch fixes a nasty token paste bug.  If you had something like

#define foo(x) bar ( x )
#define baz(x) foo ( ## x )

baz(bum)

you would expect to get "bar ( bum )" after preprocessing, but what
you actually got was "foo )" and a mysterious error about unterminated
macro invocations.

In the definition of 'baz', the left paren after 'foo' is tagged as
the left-hand side of a token paste.  When we go to check that foo()
occurs in function context, we have to look ahead for the right-hand
side of that token paste, which involves entering the "context" of the
argument.  The code which scans the arguments to 'foo' expects to be
called in the "context" of the left paren.  Thus we get confused.

The bug is intrinsic to the way the macro expander works, and this
patch merely prevents the conditions that trigger it from ever
arising.  At macro definition time, we determine whether there is
anything that can be legitimately pasted after each token on the left
of ##.  If there is not, the token does not get flagged as the left
half of a paste.  As there is nothing that can be pasted after a left
parenthesis, the bug is avoided.

The patch also implements the consensus semantics of the rest-args
extension, as loudly debated a week or so ago.  The obnoxious
"deprecated ## extension used" warnings are gone.  You will get a
-pedantic warning for rest args fed zero arguments, instead.  (Users
of this extension should check that it still does what they want.)

And I forgot to update CPP_LAST_EQ when I made <? and >? work in #if.

Bootstrapped i386-linux.

zw

	* cppmacro.c (CAN_PASTE_AFTER): New macro.
	(count_params): Don't set GNU_REST_ARGS on anything.
	(save_expansion): Set PASTE_LEFT only on tokens for which
	CAN_PASTE_AFTER is true, or which are named operators.

	* cpplex.c (parse_args): Distinguish between a rest argument
	given one empty argument, and a rest argument given zero arguments.
	(maybe_paste_with_next): Look for VOID_REST tag, and trigger
	deletion of previous token based on that.
	(get_raw_token): Flatten some control structure.

	* cpplib.h (CPP_LAST_EQ): Correct.
	(VOID_REST): New token flag.
	(GNU_REST_ARGS): Delete.

	* gcc.dg/cpp/20000625-2.c, gcc.dg/cpp/macsyntx.c: Update error
	regexps. 
	* gcc.dg/cpp/paste6.c: New test.

===================================================================
Index: cpplex.c
--- cpplex.c	2000/07/19 20:18:06	1.80
+++ cpplex.c	2000/07/20 17:15:28
@@ -2399,13 +2399,19 @@ parse_args (pfile, hp, args)
 	 debug("string");
 	 This is exactly the same as if the rest argument had received no
 	 tokens - debug("string",);  This extension is deprecated.  */
-	
-      if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
+
+      if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
 	{
 	  /* Duplicate the placemarker.  Then we can set its flags and
              position and safely be using more than one.  */
-	  save_token (args, duplicate_token (pfile, &placemarker_token));
+	  cpp_token *pm = duplicate_token (pfile, &placemarker_token);
+	  pm->flags = VOID_REST;
+	  save_token (args, pm);
 	  args->ends[argc] = total + 1;
+
+	  if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
+	    cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
+
 	  return 0;
 	}
       else
@@ -2710,17 +2716,11 @@ maybe_paste_with_next (pfile, token)
 	pasted = duplicate_token (pfile, second);
       else if (second->type == CPP_PLACEMARKER)
 	{
-	  cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
 	  /* GCC has special extended semantics for a ## b where b is
-	     a varargs parameter: a disappears if b consists of no
-	     tokens.  This extension is deprecated.  */
-	  if ((mac_context->u.list->flags & GNU_REST_ARGS)
-	      && (mac_context->u.list->tokens[mac_context->posn-1].val.aux + 1
-		  == (unsigned) mac_context->u.list->paramc))
-	    {
-	      cpp_warning (pfile, "deprecated GNU ## extension used");
-	      pasted = duplicate_token (pfile, second);
-	    }
+	     a varargs parameter: a disappears if b was given no actual
+	     arguments (not merely if b is an empty argument).  */
+	  if (second->flags & VOID_REST)
+	    pasted = duplicate_token (pfile, second);
 	  else
 	    pasted = duplicate_token (pfile, token);
 	}
@@ -3161,6 +3161,7 @@ get_raw_token (pfile)
 	{
 	  result = context->pushed_token;
 	  context->pushed_token = 0;
+	  return result;	/* Cannot be a CPP_MACRO_ARG */
 	}
       else if (context->posn == context->count)
 	{
@@ -3168,21 +3169,19 @@ get_raw_token (pfile)
 	    return &eof_token;
 	  continue;
 	}
-      else
+      else if (IS_ARG_CONTEXT (context))
 	{
-	  if (IS_ARG_CONTEXT (context))
+	  result = context->u.arg[context->posn++];
+	  if (result == 0)
 	    {
+	      context->flags ^= CONTEXT_RAW;
 	      result = context->u.arg[context->posn++];
-	      if (result == 0)
-		{
-		  context->flags ^= CONTEXT_RAW;
-		  result = context->u.arg[context->posn++];
-		}
-	      return result;	/* Cannot be a CPP_MACRO_ARG */
 	    }
-	  result = &context->u.list->tokens[context->posn++];
+	  return result;	/* Cannot be a CPP_MACRO_ARG */
 	}
 
+      result = &context->u.list->tokens[context->posn++];
+
       if (result->type != CPP_MACRO_ARG)
 	return result;
 
@@ -3225,7 +3224,6 @@ lex_next (pfile, clear)
       if (pfile->temp_used)
 	release_temp_tokens (pfile);
     }
-     
   lex_line (pfile, list);
   pfile->contexts[0].count = list->tokens_used;
 
===================================================================
Index: cpplib.h
--- cpplib.h	2000/07/19 20:18:06	1.113
+++ cpplib.h	2000/07/20 17:15:29
@@ -46,7 +46,7 @@ typedef struct cpp_hashnode cpp_hashnode
    the same order as their counterparts without the '=', like ">>".  */
 
 /* Positions in the table.  */
-#define CPP_LAST_EQ CPP_LSHIFT
+#define CPP_LAST_EQ CPP_MAX
 #define CPP_FIRST_DIGRAPH CPP_HASH
 
 #define TTYPE_TABLE				\
@@ -154,7 +154,8 @@ struct cpp_string
 #define STRINGIFY_ARG	(1 << 3) /* If macro argument to be stringified.  */
 #define PASTE_LEFT	(1 << 4) /* If on LHS of a ## operator.  */
 #define PASTED		(1 << 5) /* The result of a ## operator.  */
-#define NAMED_OP	(1 << 6) /* C++ named operators, also defined */
+#define NAMED_OP	(1 << 6) /* C++ named operators, also "defined".  */
+#define VOID_REST	(1 << 7) /* When a rest arg gets zero actual args.  */
 
 /* A preprocessing token.  This has been carefully packed and should
    occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts.  */
@@ -178,8 +179,7 @@ struct cpp_token
 /* cpp_toklist flags.  */
 #define LIST_OFFSET     (1 << 0)
 #define VAR_ARGS	(1 << 1)
-#define GNU_REST_ARGS	(1 << 2) /* Set in addition to VAR_ARGS.  */
-#define BEG_OF_FILE	(1 << 3)
+#define BEG_OF_FILE	(1 << 2)
 
 struct directive;		/* These are deliberately incomplete.  */
 struct answer;
===================================================================
Index: cppmacro.c
--- cppmacro.c	2000/07/18 23:25:05	1.2
+++ cppmacro.c	2000/07/20 17:15:30
@@ -53,6 +53,14 @@ static unsigned int find_param PARAMS ((
  					const cpp_token *));
 static cpp_toklist * alloc_macro PARAMS ((cpp_reader *, struct macro_info *));
 
+/* These are all the tokens that can have something pasted after them.
+   Comma is included in the list only to support the GNU varargs extension
+   (where you write a ## b and a disappears if b is an empty rest argument).  */
+#define CAN_PASTE_AFTER(type) \
+((type) <= CPP_LAST_EQ || (type) == CPP_COLON || (type) == CPP_HASH \
+ || (type) == CPP_DEREF || (type) == CPP_DOT || (type) == CPP_NAME \
+ || (type) == CPP_INT || (type) == CPP_FLOAT || (type) == CPP_NUMBER \
+ || (type) == CPP_MACRO_ARG || (type) == CPP_PLACEMARKER || (type) == CPP_COMMA)
 
 /* Scans for a given token, returning the parameter number if found,
    or 0 if not found.  Scans from FIRST to TOKEN - 1 or the first
@@ -192,7 +200,6 @@ count_params (pfile, info)
 	    }
 	  else
 	    {
-	      info->flags |= GNU_REST_ARGS;
 	      if (CPP_PEDANTIC (pfile))
 		cpp_pedwarn (pfile,
 			     "ISO C does not permit named varargs parameters");
@@ -294,9 +301,6 @@ parse_define (pfile, info)
 	  /* Constraint 6.10.3.5  */
 	  if (!(info->flags & VAR_ARGS) && is__va_args__ (pfile, token))
 	    return 1;
-	  /* It might be worth doing a check here that we aren't a
-	     macro argument, since we don't store the text of macro
-	     arguments.  This would reduce "len" and save space.  */
 	}
       info->ntokens++;
       if (TOKEN_SPELL (token) == SPELL_STRING)
@@ -463,7 +467,15 @@ save_expansion (pfile, info)
 	  continue;
 
 	case CPP_PASTE:
-	  dest[-1].flags |= PASTE_LEFT;
+	  /* Set the paste flag on the token to our left, unless there
+	     is no possible token to which it might be pasted.  That
+	     is critical for correct operation under some circumstances;
+	     see gcc.dg/cpp/paste6.c. */
+	  if (CAN_PASTE_AFTER (dest[-1].type) || (dest[-1].flags & NAMED_OP))
+	    dest[-1].flags |= PASTE_LEFT;
+	  else if (CPP_OPTION (pfile, warn_paste))
+	    cpp_warning_with_line (pfile, dest[-1].line, dest[-1].col,
+				   "nothing can be pasted after this token");
 	  continue;
 
 	case CPP_HASH:
===================================================================
Index: testsuite/gcc.dg/cpp/20000625-2.c
--- testsuite/gcc.dg/cpp/20000625-2.c	2000/07/04 01:58:21	1.2
+++ testsuite/gcc.dg/cpp/20000625-2.c	2000/07/20 17:46:49
@@ -2,7 +2,7 @@
 /* { dg-do run } */
 
 #define symbol_version(name, version) name##@##version
-
+/* { dg-warning "nothing can be pasted" "" { target *-*-* } 4 } */
 #define str(x) xstr(x)
 #define xstr(x) #x
 
===================================================================
Index: testsuite/gcc.dg/cpp/macsyntx.c
--- testsuite/gcc.dg/cpp/macsyntx.c	2000/07/19 20:18:08	1.3
+++ testsuite/gcc.dg/cpp/macsyntx.c	2000/07/20 17:46:49
@@ -51,16 +51,15 @@ one(ichi\
 two(ichi)			/* { dg-error "not enough" } */
 var0()				/* OK.  */
 var0(ichi)			/* OK.  */
-var1()				/* { dg-error "not enough" } */
-var1(ichi)			/* { dg-error "not enough" } */
+var1()				/* { dg-warning "rest arguments to be used" } */
+var1(ichi)			/* { dg-warning "rest arguments to be used" } */
 var1(ichi, ni)			/* OK.  */
 
-/* This tests two deprecated oddities of GNU rest args - omitting a
-   comma is OK, and backtracking a token on pasting an empty rest
-   args.  */
+/* This tests two oddities of GNU rest args - omitting a comma is OK,
+   and backtracking a token on pasting an empty rest args.  */
 #define rest(x, y...) x ## y	/* { dg-warning "ISO C" } */
-rest(ichi,)			/* { dg-warning "deprecated" } */
-rest(ichi)			/* { dg-warning "deprecated" } */
+rest(ichi,)			/* OK.  */
+rest(ichi)			/* { dg-warning "rest arguments to be used" } */
 #if 23 != rest(2, 3)		/* OK, no warning.  */
 #error 23 != 23 !!
 #endif
===================================================================
Index: testsuite/gcc.dg/cpp/paste6.c
--- testsuite/gcc.dg/cpp/paste6.c	Tue May  5 13:32:27 1998
+++ testsuite/gcc.dg/cpp/paste6.c	Thu Jul 20 10:46:49 2000
@@ -0,0 +1,12 @@
+/* Regression test for paste appearing at the beginning of a set of
+   actual arguments.  Original bug exposed by Linux kernel.  Problem
+   reported by Jakub Jelinek <jakub@redhat.com>.  */
+
+/* { dg-do compile } */
+
+extern int foo(int x);
+
+#define bar(x) foo(x)
+#define baz(x) bar(##x)  /* { dg-warning "nothing can be pasted" } */
+
+int quux(int y) { return baz(y); }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]