This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
First lexer tests, plus small trigraph bugfix
- To: gcc-patches at gcc dot gnu dot org
- Subject: First lexer tests, plus small trigraph bugfix
- From: Neil Booth <NeilB at earthling dot net>
- Date: Tue, 4 Jul 2000 23:47:04 +0900
- Cc: Zack Weinberg <zack at wolery dot cumb dot org>
The first batch of lexer tests, and a one-line fix to a bug exposed by
one of them <g>. They try to fully test lexing of identifiers,
numbers and (wide) string and character literals.
Some tests coming up soon will cover not trigraph replacing when
-trigraphs is not specified, comments, digraphs, and
almost-backslash-newlines. That's all I can think of.
If anyone has good ideas on how to test whether a preprocessing number
like "5.4.e+" is being treated as a single preprocessing token by the
lexer, please let me know. I can't think of a way to test this.
Bootstrapping on i386 linux. OK?
Neil.
* cpplex.c: Fix trigraph replacement within strings.
* gcc.dg/cpp/lexident.c, gcc.dg/cpp/lexnum.c,
gcc.dg/cpp/lexstrng.c: New tests.
Index: cpplex.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/cpplex.c,v
retrieving revision 1.59
diff -u -p -r1.59 cpplex.c
--- cpplex.c 2000/07/04 01:58:20 1.59
+++ cpplex.c 2000/07/04 14:35:04
@@ -853,7 +853,7 @@ trigraph_replace (pfile, src, limit)
/* Starting with src[1], find two consecutive '?'. The case of no
trigraphs is streamlined. */
- for (; src + 1 < limit; src += 2)
+ for (src++; src + 1 < limit; src += 2)
{
if (src[0] != '?')
continue;
Index: testsuite/gcc.dg/cpp/lexident.c
===================================================================
RCS file: lexident.c
diff -N lexident.c
--- /dev/null Tue May 5 13:32:27 1998
+++ lexident.c Tue Jul 4 07:35:40 2000
@@ -0,0 +1,25 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc. */
+
+/* { dg-do preprocess } */
+/* { dg-options "-trigraphs" } */
+
+/* Test lexing of identifiers. */
+
+/* Escaped newlines, _ and $ in identifiers. */
+#def\
+\
+ine foo_
+
+#d\
+ef??/
+in\
+e b\
+a$r
+
+#ifndef foo_
+#error foo_
+#endif
+
+#ifndef ba$r
+#error ba$r
+#endif
Index: testsuite/gcc.dg/cpp/lexnum.c
===================================================================
RCS file: lexnum.c
diff -N lexnum.c
--- /dev/null Tue May 5 13:32:27 1998
+++ lexnum.c Tue Jul 4 07:35:40 2000
@@ -0,0 +1,50 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc. */
+
+/* { dg-do run } */
+/* { dg-options "-trigraphs" } */
+
+/* Test lexing of numbers. */
+
+extern int puts (const char *);
+extern void abort (void);
+#define err(str) do { puts(str); abort(); } while (0)
+
+/* Escaped newlines. */
+#define foo 12\
+3\
+\
+4??/
+5
+
+#if foo != 12345
+#error foo
+#endif
+
+int main (int argc, char *argv[])
+{
+ double a = 5.;
+ double x = .5;
+
+/* Decimal points, including initially and immediately before and
+ after an escaped newline. */
+ if (a != 5)
+ err ("a");
+ if (x != .\
+5)
+ err ("x != .5");
+ x = 25\
+.\
+6;
+ if (x != 25.6)
+ err ("x != 25.6");
+
+ /* Test exponentials and their signs. A buggy lexer is more likely
+ to fail the compile, but never mind. */
+ if (250 != 25e+1 || 250 != 25e1 || 250 != 2500e-1)
+ err ("exponentials");
+
+ /* Todo: p exponentials, and how to test preprocessing number
+ tokenisation? */
+
+ return 0;
+}
Index: testsuite/gcc.dg/cpp/lexstrng.c
===================================================================
RCS file: lexstrng.c
diff -N lexstrng.c
--- /dev/null Tue May 5 13:32:27 1998
+++ lexstrng.c Tue Jul 4 07:35:40 2000
@@ -0,0 +1,69 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc. */
+
+/* { dg-do run } */
+/* { dg-options "-trigraphs" } */
+
+/* Test lexing of strings and character constants. */
+
+#include <string.h>
+
+#ifndef __WCHAR_TYPE__
+#define __WCHAR_TYPE__ int
+#endif
+typedef __WCHAR_TYPE__ wchar_t;
+
+extern int strcmp (const char *, const char *);
+extern int puts (const char *);
+extern void abort (void);
+#define err(str) do { puts(str); abort(); } while (0)
+
+/* Escaped newlines. */
+#const char *str1 = "s\
+t\
+\
+r??/
+ 1";
+
+const char x = '\
+??/
+b';
+
+/* Test escaped terminators. */
+const char *term = "\"\\\"\\";
+const char termc = '\'';
+const char *terms = "'";
+
+/* Test wide strings and chars are lexed. */
+const wchar_t wchar = L'wc';
+const wchar_t wstring = L"wide string";
+
+/* Test all 9 trigraphs embedded in a string. Test trigraphs do not
+ survive an embedded backslash newline. Test trigraphs preceded by
+ a '?' are still noticed. */
+const char *t = "??/\??<??>??=??)??\
+(??(??!??'??-???=???/
+?-";
+
+int main (int argc, char *argv[])
+{
+ if (strcmp (str1, "str 1"))
+ err ("str1");
+
+ if (x != 'b')
+ err ("b");
+
+ /* We have to split the string up to avoid trigraph replacement
+ here. Split the 2 trigraphs after both 1 and 2 ?s; just doing
+ this exposed a bug in the initial release of the tokenized lexer. */
+ if (strcmp (t, "\\{}#]?" "?([|^~?#??" "-"))
+ err ("Embedded trigraphs");
+
+ if (term[0] != '"' || term[1] != '\\' || term[2] != '"'
+ || term[3] != '\\' || term[4] != '\0')
+ err ("Escaped string terminators");
+
+ if (termc != terms[0])
+ err ("Escaped character constant terminator");
+
+ return 0;
+}