This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]

First lexer tests, plus small trigraph bugfix

To: gcc-patches at gcc dot gnu dot org
Subject: First lexer tests, plus small trigraph bugfix
From: Neil Booth <NeilB at earthling dot net>
Date: Tue, 4 Jul 2000 23:47:04 +0900
Cc: Zack Weinberg <zack at wolery dot cumb dot org>

The first batch of lexer tests, and a one-line fix to a bug exposed by
one of them <g>.  They try to fully test lexing of identifiers,
numbers and (wide) string and character literals.

Some tests coming up soon will cover not trigraph replacing when
-trigraphs is not specified, comments, digraphs, and
almost-backslash-newlines.  That's all I can think of.

If anyone has good ideas on how to test whether a preprocessing number
like "5.4.e+" is being treated as a single preprocessing token by the
lexer, please let me know.  I can't think of a way to test this.

Bootstrapping on i386 linux.  OK?

Neil.

	* cpplex.c: Fix trigraph replacement within strings.
	* gcc.dg/cpp/lexident.c, gcc.dg/cpp/lexnum.c,
	  gcc.dg/cpp/lexstrng.c: New tests.

Index: cpplex.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplex.c,v
retrieving revision 1.59
diff -u -p -r1.59 cpplex.c
--- cpplex.c	2000/07/04 01:58:20	1.59
+++ cpplex.c	2000/07/04 14:35:04
@@ -853,7 +853,7 @@ trigraph_replace (pfile, src, limit)
   /* Starting with src[1], find two consecutive '?'.  The case of no
      trigraphs is streamlined.  */
   
-  for (; src + 1 < limit; src += 2)
+  for (src++; src + 1 < limit; src += 2)
     {
       if (src[0] != '?')
 	continue;
Index: testsuite/gcc.dg/cpp/lexident.c
===================================================================
RCS file: lexident.c
diff -N lexident.c
--- /dev/null	Tue May  5 13:32:27 1998
+++ lexident.c	Tue Jul  4 07:35:40 2000
@@ -0,0 +1,25 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.  */
+
+/* { dg-do preprocess } */
+/* { dg-options "-trigraphs" } */
+
+/* Test lexing of identifiers.  */
+
+/* Escaped newlines, _ and $ in identifiers.  */
+#def\
+\
+ine foo_
+
+#d\
+ef??/
+in\
+e b\
+a$r
+
+#ifndef foo_
+#error foo_
+#endif
+
+#ifndef ba$r
+#error ba$r
+#endif
Index: testsuite/gcc.dg/cpp/lexnum.c
===================================================================
RCS file: lexnum.c
diff -N lexnum.c
--- /dev/null	Tue May  5 13:32:27 1998
+++ lexnum.c	Tue Jul  4 07:35:40 2000
@@ -0,0 +1,50 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.  */
+
+/* { dg-do run } */
+/* { dg-options "-trigraphs" } */
+
+/* Test lexing of numbers.  */
+
+extern int puts (const char *);
+extern void abort (void);
+#define err(str) do { puts(str); abort(); } while (0)
+
+/* Escaped newlines.  */
+#define foo 12\
+3\
+\
+4??/
+5
+
+#if foo != 12345
+#error foo
+#endif
+
+int main (int argc, char *argv[])
+{
+  double a = 5.;
+  double x = .5;
+
+/* Decimal points, including initially and immediately before and
+   after an escaped newline.  */
+  if (a != 5)
+    err ("a");
+  if (x != .\
+5)
+    err ("x != .5");
+  x = 25\
+.\
+6;
+  if (x != 25.6)
+    err ("x != 25.6");
+
+  /* Test exponentials and their signs.  A buggy lexer is more likely
+     to fail the compile, but never mind.  */
+  if (250 != 25e+1 || 250 != 25e1 || 250 != 2500e-1)
+    err ("exponentials");
+
+  /* Todo: p exponentials, and how to test preprocessing number
+     tokenisation?  */
+
+  return 0;
+}
Index: testsuite/gcc.dg/cpp/lexstrng.c
===================================================================
RCS file: lexstrng.c
diff -N lexstrng.c
--- /dev/null	Tue May  5 13:32:27 1998
+++ lexstrng.c	Tue Jul  4 07:35:40 2000
@@ -0,0 +1,69 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.  */
+
+/* { dg-do run } */
+/* { dg-options "-trigraphs" } */
+
+/* Test lexing of strings and character constants.  */
+
+#include <string.h>
+
+#ifndef __WCHAR_TYPE__
+#define __WCHAR_TYPE__ int
+#endif
+typedef __WCHAR_TYPE__ wchar_t;
+
+extern int strcmp (const char *, const char *);
+extern int puts (const char *);
+extern void abort (void);
+#define err(str) do { puts(str); abort(); } while (0)
+
+/* Escaped newlines.  */
+#const char *str1 = "s\
+t\
+\
+r??/
+  1";
+
+const char x = '\
+??/
+b';
+
+/* Test escaped terminators.  */
+const char *term = "\"\\\"\\";
+const char termc = '\'';
+const char *terms = "'";
+
+/* Test wide strings and chars are lexed.  */
+const wchar_t wchar = L'wc';
+const wchar_t wstring = L"wide string";
+
+/* Test all 9 trigraphs embedded in a string.  Test trigraphs do not
+   survive an embedded backslash newline.  Test trigraphs preceded by
+   a '?' are still noticed.  */
+const char *t = "??/\??<??>??=??)??\
+(??(??!??'??-???=???/
+?-";
+
+int main (int argc, char *argv[])
+{
+  if (strcmp (str1, "str  1"))
+    err ("str1");
+
+  if (x != 'b')
+    err ("b");
+
+  /* We have to split the string up to avoid trigraph replacement
+     here.  Split the 2 trigraphs after both 1 and 2 ?s; just doing
+     this exposed a bug in the initial release of the tokenized lexer.  */
+  if (strcmp (t, "\\{}#]?" "?([|^~?#??" "-"))
+    err ("Embedded trigraphs");
+
+  if (term[0] != '"' || term[1] != '\\' || term[2] != '"'
+      || term[3] != '\\' || term[4] != '\0')
+    err ("Escaped string terminators");
+
+  if (termc != terms[0])
+    err ("Escaped character constant terminator");
+
+  return 0;
+}

Follow-Ups:
- Re: First lexer tests, plus small trigraph bugfix
  - From: Neil Booth
- Re: First lexer tests, plus small trigraph bugfix
  - From: Per Bothner

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]