[gcc.git] / gcc / d / dmd / lexer.d

/**
 * Implements the lexical analyzer, which converts source code into lexical tokens.
 *
 * Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical)
 *
 * Copyright:   Copyright (C) 1999-2022 by The D Language Foundation, All Rights Reserved
 * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
 * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
 * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d)
 * Documentation:  https://dlang.org/phobos/dmd_lexer.html
 * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d
 */

module dmd.lexer;

import core.stdc.ctype;
import core.stdc.errno;
import core.stdc.stdarg;
import core.stdc.stdio;
import core.stdc.stdlib : getenv;
import core.stdc.string;
import core.stdc.time;

import dmd.entity;
import dmd.errors;
import dmd.globals;
import dmd.id;
import dmd.identifier;
import dmd.root.array;
import dmd.root.ctfloat;
import dmd.common.outbuffer;
import dmd.root.port;
import dmd.root.rmem;
import dmd.root.string;
import dmd.root.utf;
import dmd.tokens;
import dmd.utils;

nothrow:

version (DMDLIB)
{
    version = LocOffset;
}

/***********************************************************
 */
class Lexer
{
    private __gshared OutBuffer stringbuffer;

    Loc scanloc;            // for error messages
    Loc prevloc;            // location of token before current

    const(char)* p;         // current character

    Token token;

    // For ImportC
    bool Ccompile;              /// true if compiling ImportC

    // The following are valid only if (Ccompile == true)
    ubyte boolsize;             /// size of a C _Bool, default 1
    ubyte shortsize;            /// size of a C short, default 2
    ubyte intsize;              /// size of a C int, default 4
    ubyte longsize;             /// size of C long, 4 or 8
    ubyte long_longsize;        /// size of a C long long, default 8
    ubyte long_doublesize;      /// size of C long double, 8 or D real.sizeof
    ubyte wchar_tsize;          /// size of C wchar_t, 2 or 4

    private
    {
        const(char)* base;      // pointer to start of buffer
        const(char)* end;       // pointer to last element of buffer
        const(char)* line;      // start of current line

        bool doDocComment;      // collect doc comment information
        bool anyToken;          // seen at least one token
        bool commentToken;      // comments are TOK.comment's
        bool tokenizeNewlines;  // newlines are turned into TOK.endOfLine's

        version (DMDLIB)
        {
            bool whitespaceToken;   // tokenize whitespaces
        }

        int inTokenStringConstant; // can be larger than 1 when in nested q{} strings
        int lastDocLine;        // last line of previous doc comment

        Token* tokenFreelist;
    }

  nothrow:

    /*********************
     * Creates a Lexer for the source code base[begoffset..endoffset+1].
     * The last character, base[endoffset], must be null (0) or EOF (0x1A).
     *
     * Params:
     *  filename = used for error messages
     *  base = source code, must be terminated by a null (0) or EOF (0x1A) character
     *  begoffset = starting offset into base[]
     *  endoffset = the last offset to read into base[]
     *  doDocComment = handle documentation comments
     *  commentToken = comments become TOK.comment's
     */
    this(const(char)* filename, const(char)* base, size_t begoffset,
        size_t endoffset, bool doDocComment, bool commentToken) pure
    {
        scanloc = Loc(filename, 1, 1);
        // debug printf("Lexer::Lexer(%p)\n", base);
        // debug printf("lexer.filename = %s\n", filename);
        token = Token.init;
        this.base = base;
        this.end = base + endoffset;
        p = base + begoffset;
        line = p;
        this.doDocComment = doDocComment;
        this.commentToken = commentToken;
        this.tokenizeNewlines = false;
        this.inTokenStringConstant = 0;
        this.lastDocLine = 0;
        //initKeywords();
        /* If first line starts with '#!', ignore the line
         */
        if (p && p[0] == '#' && p[1] == '!')
        {
            p += 2;
            while (1)
            {
                char c = *p++;
                switch (c)
                {
                case 0:
                case 0x1A:
                    p--;
                    goto case;
                case '\n':
                    break;
                default:
                    continue;
                }
                break;
            }
            endOfLine();
        }
    }

    /******************
     * Used for unittests for a mock Lexer
     */
    this() { }

    /**************************************
     * Reset lexer to lex #define's
     */
    final void resetDefineLines(const(char)[] slice)
    {
        base = slice.ptr;
        end = base + slice.length;
        assert(*end == 0);
        p = base;
        line = p;
        tokenizeNewlines = true;
        inTokenStringConstant = 0;
        lastDocLine = 0;
        scanloc = Loc("#defines", 1, 1);
    }

    /**********************************
     * Set up for next #define line.
     * p should be at start of next line.
     */
    final void nextDefineLine()
    {
        tokenizeNewlines = true;
    }

    version (DMDLIB)
    {
        this(const(char)* filename, const(char)* base, size_t begoffset, size_t endoffset,
            bool doDocComment, bool commentToken, bool whitespaceToken)
        {
            this(filename, base, begoffset, endoffset, doDocComment, commentToken);
            this.whitespaceToken = whitespaceToken;
        }

        bool empty() const pure @property @nogc @safe
        {
            return front() == TOK.endOfFile;
        }

        TOK front() const pure @property @nogc @safe
        {
            return token.value;
        }

        void popFront()
        {
            nextToken();
        }
    }

    /// Returns: a newly allocated `Token`.
    Token* allocateToken() pure nothrow @safe
    {
        if (tokenFreelist)
        {
            Token* t = tokenFreelist;
            tokenFreelist = t.next;
            t.next = null;
            return t;
        }
        return new Token();
    }

    /// Frees the given token by returning it to the freelist.
    private void releaseToken(Token* token) pure nothrow @nogc @safe
    {
        if (mem.isGCEnabled)
            *token = Token.init;
        token.next = tokenFreelist;
        tokenFreelist = token;
    }

    final TOK nextToken()
    {
        prevloc = token.loc;
        if (token.next)
        {
            Token* t = token.next;
            memcpy(&token, t, Token.sizeof);
            releaseToken(t);
        }
        else
        {
            scan(&token);
        }
        //printf(token.toChars());
        return token.value;
    }

    /***********************
     * Look ahead at next token's value.
     */
    final TOK peekNext()
    {
        return peek(&token).value;
    }

    /***********************
     * Look 2 tokens ahead at value.
     */
    final TOK peekNext2()
    {
        Token* t = peek(&token);
        return peek(t).value;
    }

    /****************************
     * Turn next token in buffer into a token.
     * Params:
     *  t = the token to set the resulting Token to
     */
    final void scan(Token* t)
    {
        const lastLine = scanloc.linnum;
        Loc startLoc;
        t.blockComment = null;
        t.lineComment = null;

        while (1)
        {
            t.ptr = p;
            //printf("p = %p, *p = '%c'\n",p,*p);
            t.loc = loc();
            switch (*p)
            {
            case 0:
            case 0x1A:
                t.value = TOK.endOfFile; // end of file
                // Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile.
                return;
            case ' ':
                // Skip 4 spaces at a time after aligning 'p' to a 4-byte boundary.
                while ((cast(size_t)p) % uint.sizeof)
                {
                    if (*p != ' ')
                        goto LendSkipFourSpaces;
                    p++;
                }
                while (*(cast(uint*)p) == 0x20202020) // ' ' == 0x20
                    p += 4;
                // Skip over any remaining space on the line.
                while (*p == ' ')
                    p++;
            LendSkipFourSpaces:
                version (DMDLIB)
                {
                    if (whitespaceToken)
                    {
                        t.value = TOK.whitespace;
                        return;
                    }
                }
                continue; // skip white space
            case '\t':
            case '\v':
            case '\f':
                p++;
                version (DMDLIB)
                {
                    if (whitespaceToken)
                    {
                        t.value = TOK.whitespace;
                        return;
                    }
                }
                continue; // skip white space
            case '\r':
                p++;
                if (*p != '\n') // if CR stands by itself
                {
                    endOfLine();
                    if (tokenizeNewlines)
                    {
                        t.value = TOK.endOfLine;
                        tokenizeNewlines = false;
                        return;
                    }
                }
                version (DMDLIB)
                {
                    if (whitespaceToken)
                    {
                        t.value = TOK.whitespace;
                        return;
                    }
                }
                continue; // skip white space
            case '\n':
                p++;
                endOfLine();
                if (tokenizeNewlines)
                {
                    t.value = TOK.endOfLine;
                    tokenizeNewlines = false;
                    return;
                }
                version (DMDLIB)
                {
                    if (whitespaceToken)
                    {
                        t.value = TOK.whitespace;
                        return;
                    }
                }
                continue; // skip white space
            case '0':
                if (!isZeroSecond(p[1]))        // if numeric literal does not continue
                {
                    ++p;
                    t.unsvalue = 0;
                    t.value = TOK.int32Literal;
                    return;
                }
                goto Lnumber;

            case '1': .. case '9':
                if (!isDigitSecond(p[1]))       // if numeric literal does not continue
                {
                    t.unsvalue = *p - '0';
                    ++p;
                    t.value = TOK.int32Literal;
                    return;
                }
            Lnumber:
                t.value = number(t);
                return;

            case '\'':
                if (issinglechar(p[1]) && p[2] == '\'')
                {
                    t.unsvalue = p[1];        // simple one character literal
                    t.value = TOK.charLiteral;
                    p += 3;
                }
                else if (Ccompile)
                {
                    clexerCharConstant(*t, 0);
                }
                else
                {
                    t.value = charConstant(t);
                }
                return;

            case 'u':
            case 'U':
            case 'L':
                if (!Ccompile)
                    goto case_ident;
                if (p[1] == '\'')       // C wide character constant
                {
                    char c = *p;
                    if (c == 'L')       // convert L to u or U
                        c = (wchar_tsize == 4) ? 'u' : 'U';
                    ++p;
                    clexerCharConstant(*t, c);
                    return;
                }
                else if (p[1] == '\"')  // C wide string literal
                {
                    const c = *p;
                    ++p;
                    escapeStringConstant(t);
                    t.postfix = c == 'L' ? (wchar_tsize == 2 ? 'w' : 'd') :
                                c == 'u' ? 'w' :
                                'd';
                    return;
                }
                else if (p[1] == '8' && p[2] == '\"') // C UTF-8 string literal
                {
                    p += 2;
                    escapeStringConstant(t);
                    return;
                }
                goto case_ident;

            case 'r':
                if (Ccompile || p[1] != '"')
                    goto case_ident;
                p++;
                goto case '`';
            case '`':
                if (Ccompile)
                    goto default;
                wysiwygStringConstant(t);
                return;
            case 'q':
                if (Ccompile)
                    goto case_ident;
                if (p[1] == '"')
                {
                    p++;
                    delimitedStringConstant(t);
                    return;
                }
                else if (p[1] == '{')
                {
                    p++;
                    tokenStringConstant(t);
                    return;
                }
                else
                    goto case_ident;
            case '"':
                escapeStringConstant(t);
                return;
            case 'a':
            case 'b':
            case 'c':
            case 'd':
            case 'e':
            case 'f':
            case 'g':
            case 'h':
            case 'i':
            case 'j':
            case 'k':
            case 'l':
            case 'm':
            case 'n':
            case 'o':
            case 'p':
                /*case 'q': case 'r':*/
            case 's':
            case 't':
            //case 'u':
            case 'v':
            case 'w':
            case 'x':
            case 'y':
            case 'z':
            case 'A':
            case 'B':
            case 'C':
            case 'D':
            case 'E':
            case 'F':
            case 'G':
            case 'H':
            case 'I':
            case 'J':
            case 'K':
            //case 'L':
            case 'M':
            case 'N':
            case 'O':
            case 'P':
            case 'Q':
            case 'R':
            case 'S':
            case 'T':
            //case 'U':
            case 'V':
            case 'W':
            case 'X':
            case 'Y':
            case 'Z':
            case '_':
            case_ident:
                {
                    while (1)
                    {
                        const c = *++p;
                        if (isidchar(c))
                            continue;
                        else if (c & 0x80)
                        {
                            const s = p;
                            const u = decodeUTF();
                            if (isUniAlpha(u))
                                continue;
                            error("char 0x%04x not allowed in identifier", u);
                            p = s;
                        }
                        break;
                    }
                    Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr));
                    t.ident = id;
                    t.value = cast(TOK)id.getValue();

                    anyToken = 1;

                    /* Different keywords for C and D
                     */
                    if (Ccompile)
                    {
                        if (t.value != TOK.identifier)
                        {
                            t.value = Ckeywords[t.value];  // filter out D keywords
                        }
                    }
                    else if (t.value >= FirstCKeyword)
                        t.value = TOK.identifier;       // filter out C keywords

                    else if (*t.ptr == '_') // if special identifier token
                    {
                        // Lazy initialization
                        TimeStampInfo.initialize(t.loc);

                        if (id == Id.DATE)
                        {
                            t.ustring = TimeStampInfo.date.ptr;
                            goto Lstr;
                        }
                        else if (id == Id.TIME)
                        {
                            t.ustring = TimeStampInfo.time.ptr;
                            goto Lstr;
                        }
                        else if (id == Id.VENDOR)
                        {
                            t.ustring = global.vendor.xarraydup.ptr;
                            goto Lstr;
                        }
                        else if (id == Id.TIMESTAMP)
                        {
                            t.ustring = TimeStampInfo.timestamp.ptr;
                        Lstr:
                            t.value = TOK.string_;
                            t.postfix = 0;
                            t.len = cast(uint)strlen(t.ustring);
                        }
                        else if (id == Id.VERSIONX)
                        {
                            t.value = TOK.int64Literal;
                            t.unsvalue = global.versionNumber();
                        }
                        else if (id == Id.EOFX)
                        {
                            t.value = TOK.endOfFile;
                            // Advance scanner to end of file
                            while (!(*p == 0 || *p == 0x1A))
                                p++;
                        }
                    }
                    //printf("t.value = %d\n",t.value);
                    return;
                }
            case '/':
                p++;
                switch (*p)
                {
                case '=':
                    p++;
                    t.value = TOK.divAssign;
                    return;
                case '*':
                    p++;
                    startLoc = loc();
                    while (1)
                    {
                        while (1)
                        {
                            const c = *p;
                            switch (c)
                            {
                            case '/':
                                break;
                            case '\n':
                                endOfLine();
                                p++;
                                continue;
                            case '\r':
                                p++;
                                if (*p != '\n')
                                    endOfLine();
                                continue;
                            case 0:
                            case 0x1A:
                                error("unterminated /* */ comment");
                                p = end;
                                t.loc = loc();
                                t.value = TOK.endOfFile;
                                return;
                            default:
                                if (c & 0x80)
                                {
                                    const u = decodeUTF();
                                    if (u == PS || u == LS)
                                        endOfLine();
                                }
                                p++;
                                continue;
                            }
                            break;
                        }
                        p++;
                        if (p[-2] == '*' && p - 3 != t.ptr)
                            break;
                    }
                    if (commentToken)
                    {
                        t.loc = startLoc;
                        t.value = TOK.comment;
                        return;
                    }
                    else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
                    {
                        // if /** but not /**/
                        getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
                        lastDocLine = scanloc.linnum;
                    }
                    continue;
                case '/': // do // style comments
                    startLoc = loc();
                    while (1)
                    {
                        const c = *++p;
                        switch (c)
                        {
                        case '\n':
                            break;
                        case '\r':
                            if (p[1] == '\n')
                                p++;
                            break;
                        case 0:
                        case 0x1A:
                            if (commentToken)
                            {
                                p = end;
                                t.loc = startLoc;
                                t.value = TOK.comment;
                                return;
                            }
                            if (doDocComment && t.ptr[2] == '/')
                            {
                                getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
                                lastDocLine = scanloc.linnum;
                            }
                            p = end;
                            t.loc = loc();
                            t.value = TOK.endOfFile;
                            return;
                        default:
                            if (c & 0x80)
                            {
                                const u = decodeUTF();
                                if (u == PS || u == LS)
                                    break;
                            }
                            continue;
                        }
                        break;
                    }
                    if (commentToken)
                    {
                        version (DMDLIB) {}
                        else
                        {
                            p++;
                            endOfLine();
                        }
                        t.loc = startLoc;
                        t.value = TOK.comment;
                        return;
                    }
                    if (doDocComment && t.ptr[2] == '/')
                    {
                        getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
                        lastDocLine = scanloc.linnum;
                    }
                    p++;
                    endOfLine();
                    continue;
                case '+':
                    if (!Ccompile)
                    {
                        int nest;
                        startLoc = loc();
                        p++;
                        nest = 1;
                        while (1)
                        {
                            char c = *p;
                            switch (c)
                            {
                            case '/':
                                p++;
                                if (*p == '+')
                                {
                                    p++;
                                    nest++;
                                }
                                continue;
                            case '+':
                                p++;
                                if (*p == '/')
                                {
                                    p++;
                                    if (--nest == 0)
                                        break;
                                }
                                continue;
                            case '\r':
                                p++;
                                if (*p != '\n')
                                    endOfLine();
                                continue;
                            case '\n':
                                endOfLine();
                                p++;
                                continue;
                            case 0:
                            case 0x1A:
                                error("unterminated /+ +/ comment");
                                p = end;
                                t.loc = loc();
                                t.value = TOK.endOfFile;
                                return;
                            default:
                                if (c & 0x80)
                                {
                                    uint u = decodeUTF();
                                    if (u == PS || u == LS)
                                        endOfLine();
                                }
                                p++;
                                continue;
                            }
                            break;
                        }
                        if (commentToken)
                        {
                            t.loc = startLoc;
                            t.value = TOK.comment;
                            return;
                        }
                        if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
                        {
                            // if /++ but not /++/
                            getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
                            lastDocLine = scanloc.linnum;
                        }
                        continue;
                    }
                    break;
                default:
                    break;
                }
                t.value = TOK.div;
                return;
            case '.':
                p++;
                if (isdigit(*p))
                {
                    /* Note that we don't allow ._1 and ._ as being
                     * valid floating point numbers.
                     */
                    p--;
                    t.value = inreal(t);
                }
                else if (p[0] == '.')
                {
                    if (p[1] == '.')
                    {
                        p += 2;
                        t.value = TOK.dotDotDot;
                    }
                    else
                    {
                        p++;
                        t.value = TOK.slice;
                    }
                }
                else
                    t.value = TOK.dot;
                return;
            case '&':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.andAssign;
                }
                else if (*p == '&')
                {
                    p++;
                    t.value = TOK.andAnd;
                }
                else
                    t.value = TOK.and;
                return;
            case '|':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.orAssign;
                }
                else if (*p == '|')
                {
                    p++;
                    t.value = TOK.orOr;
                }
                else
                    t.value = TOK.or;
                return;
            case '-':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.minAssign;
                }
                else if (*p == '-')
                {
                    p++;
                    t.value = TOK.minusMinus;
                }
                else if (*p == '>')
                {
                    ++p;
                    t.value = TOK.arrow;
                }
                else
                    t.value = TOK.min;
                return;
            case '+':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.addAssign;
                }
                else if (*p == '+')
                {
                    p++;
                    t.value = TOK.plusPlus;
                }
                else
                    t.value = TOK.add;
                return;
            case '<':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.lessOrEqual; // <=
                }
                else if (*p == '<')
                {
                    p++;
                    if (*p == '=')
                    {
                        p++;
                        t.value = TOK.leftShiftAssign; // <<=
                    }
                    else
                        t.value = TOK.leftShift; // <<
                }
                else if (*p == ':' && Ccompile)
                {
                    ++p;
                    t.value = TOK.leftBracket;  // <:
                }
                else if (*p == '%' && Ccompile)
                {
                    ++p;
                    t.value = TOK.leftCurly;    // <%
                }
                else
                    t.value = TOK.lessThan; // <
                return;
            case '>':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.greaterOrEqual; // >=
                }
                else if (*p == '>')
                {
                    p++;
                    if (*p == '=')
                    {
                        p++;
                        t.value = TOK.rightShiftAssign; // >>=
                    }
                    else if (*p == '>')
                    {
                        p++;
                        if (*p == '=')
                        {
                            p++;
                            t.value = TOK.unsignedRightShiftAssign; // >>>=
                        }
                        else
                            t.value = TOK.unsignedRightShift; // >>>
                    }
                    else
                        t.value = TOK.rightShift; // >>
                }
                else
                    t.value = TOK.greaterThan; // >
                return;
            case '!':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.notEqual; // !=
                }
                else
                    t.value = TOK.not; // !
                return;
            case '=':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.equal; // ==
                }
                else if (*p == '>')
                {
                    p++;
                    t.value = TOK.goesTo; // =>
                }
                else
                    t.value = TOK.assign; // =
                return;
            case '~':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.concatenateAssign; // ~=
                }
                else
                    t.value = TOK.tilde; // ~
                return;
            case '^':
                p++;
                if (*p == '^')
                {
                    p++;
                    if (*p == '=')
                    {
                        p++;
                        t.value = TOK.powAssign; // ^^=
                    }
                    else
                        t.value = TOK.pow; // ^^
                }
                else if (*p == '=')
                {
                    p++;
                    t.value = TOK.xorAssign; // ^=
                }
                else
                    t.value = TOK.xor; // ^
                return;
            case '(':
                p++;
                t.value = TOK.leftParenthesis;
                return;
            case ')':
                p++;
                t.value = TOK.rightParenthesis;
                return;
            case '[':
                p++;
                t.value = TOK.leftBracket;
                return;
            case ']':
                p++;
                t.value = TOK.rightBracket;
                return;
            case '{':
                p++;
                t.value = TOK.leftCurly;
                return;
            case '}':
                p++;
                t.value = TOK.rightCurly;
                return;
            case '?':
                p++;
                t.value = TOK.question;
                return;
            case ',':
                p++;
                t.value = TOK.comma;
                return;
            case ';':
                p++;
                t.value = TOK.semicolon;
                return;
            case ':':
                p++;
                if (*p == ':')
                {
                    ++p;
                    t.value = TOK.colonColon;
                }
                else if (*p == '>' && Ccompile)
                {
                    ++p;
                    t.value = TOK.rightBracket;
                }
                else
                    t.value = TOK.colon;
                return;
            case '$':
                p++;
                t.value = TOK.dollar;
                return;
            case '@':
                p++;
                t.value = TOK.at;
                return;
            case '*':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.mulAssign;
                }
                else
                    t.value = TOK.mul;
                return;
            case '%':
                p++;
                if (*p == '=')
                {
                    p++;
                    t.value = TOK.modAssign;
                }
                else if (*p == '>' && Ccompile)
                {
                    ++p;
                    t.value = TOK.rightCurly;
                }
                else if (*p == ':' && Ccompile)
                {
                    goto case '#';      // %: means #
                }
                else
                    t.value = TOK.mod;
                return;
            case '#':
                {
                    // https://issues.dlang.org/show_bug.cgi?id=22825
                    // Special token sequences are terminated by newlines,
                    // and should not be skipped over.
                    this.tokenizeNewlines = true;
                    p++;
                    if (parseSpecialTokenSequence())
                        continue;
                    t.value = TOK.pound;
                    return;
                }
            default:
                {
                    dchar c = *p;
                    if (c & 0x80)
                    {
                        c = decodeUTF();
                        // Check for start of unicode identifier
                        if (isUniAlpha(c))
                            goto case_ident;
                        if (c == PS || c == LS)
                        {
                            endOfLine();
                            p++;
                            if (tokenizeNewlines)
                            {
                                t.value = TOK.endOfLine;
                                tokenizeNewlines = false;
                                return;
                            }
                            continue;
                        }
                    }
                    if (c < 0x80 && isprint(c))
                        error("character '%c' is not a valid token", c);
                    else
                        error("character 0x%02x is not a valid token", c);
                    p++;
                    continue;
                }
            }
        }
    }

    final Token* peek(Token* ct)
    {
        Token* t;
        if (ct.next)
            t = ct.next;
        else
        {
            t = allocateToken();
            scan(t);
            ct.next = t;
        }
        return t;
    }

    /*********************************
     * tk is on the opening (.
     * Look ahead and return token that is past the closing ).
     */
    final Token* peekPastParen(Token* tk)
    {
        //printf("peekPastParen()\n");
        int parens = 1;
        int curlynest = 0;
        while (1)
        {
            tk = peek(tk);
            //tk.print();
            switch (tk.value)
            {
            case TOK.leftParenthesis:
                parens++;
                continue;
            case TOK.rightParenthesis:
                --parens;
                if (parens)
                    continue;
                tk = peek(tk);
                break;
            case TOK.leftCurly:
                curlynest++;
                continue;
            case TOK.rightCurly:
                if (--curlynest >= 0)
                    continue;
                break;
            case TOK.semicolon:
                if (curlynest)
                    continue;
                break;
            case TOK.endOfFile:
                break;
            default:
                continue;
            }
            return tk;
        }
    }

    /*******************************************
     * Parse escape sequence.
     */
    private uint escapeSequence()
    {
        return Lexer.escapeSequence(token.loc, p, Ccompile);
    }

    /********
     * Parse the given string literal escape sequence into a single character.
     * D https://dlang.org/spec/lex.html#escape_sequences
     * C11 6.4.4.4
     * Params:
     *  loc = location to use for error messages
     *  sequence = pointer to string with escape sequence to parse. Updated to
     *             point past the end of the escape sequence
     *  Ccompile = true for compile C11 escape sequences
     * Returns:
     *  the escape sequence as a single character
     */
    private dchar escapeSequence(const ref Loc loc, ref const(char)* sequence, bool Ccompile)
    {
        const(char)* p = sequence; // cache sequence reference on stack
        scope(exit) sequence = p;

        uint c = *p;
        int ndigits;
        switch (c)
        {
        case '\'':
        case '"':
        case '?':
        case '\\':
        Lconsume:
            p++;
            break;
        case 'a':
            c = 7;
            goto Lconsume;
        case 'b':
            c = 8;
            goto Lconsume;
        case 'f':
            c = 12;
            goto Lconsume;
        case 'n':
            c = 10;
            goto Lconsume;
        case 'r':
            c = 13;
            goto Lconsume;
        case 't':
            c = 9;
            goto Lconsume;
        case 'v':
            c = 11;
            goto Lconsume;
        case 'u':
            ndigits = 4;
            goto Lhex;
        case 'U':
            ndigits = 8;
            goto Lhex;
        case 'x':
            ndigits = 2;
        Lhex:
            p++;
            c = *p;
            if (ishex(cast(char)c))
            {
                uint v = 0;
                int n = 0;
                if (Ccompile && ndigits == 2)
                {
                    /* C11 6.4.4.4-7 one to infinity hex digits
                     */
                    do
                    {
                        if (isdigit(cast(char)c))
                            c -= '0';
                        else if (islower(c))
                            c -= 'a' - 10;
                        else
                            c -= 'A' - 10;
                        v = v * 16 + c;
                        c = *++p;
                    } while (ishex(cast(char)c));
                }
                else
                {
                    while (1)
                    {
                        if (isdigit(cast(char)c))
                            c -= '0';
                        else if (islower(c))
                            c -= 'a' - 10;
                        else
                            c -= 'A' - 10;
                        v = v * 16 + c;
                        c = *++p;
                        if (++n == ndigits)
                            break;
                        if (!ishex(cast(char)c))
                        {
                            error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits);
                            break;
                        }
                    }
                    if (ndigits != 2 && !utf_isValidDchar(v))
                    {
                        error(loc, "invalid UTF character \\U%08x", v);
                        v = '?'; // recover with valid UTF character
                    }
                }
                c = v;
            }
            else
            {
                error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c);
                p++;
            }
            break;
        case '&':
            if (Ccompile)
                goto default;

            // named character entity
            for (const idstart = ++p; 1; p++)
            {
                switch (*p)
                {
                case ';':
                    c = HtmlNamedEntity(idstart[0 .. p - idstart]);
                    if (c == ~0)
                    {
                        error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart);
                        c = '?';
                    }
                    p++;
                    break;
                default:
                    if (isalpha(*p) || (p != idstart && isdigit(*p)))
                        continue;
                    error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart);
                    c = '?';
                    break;
                }
                break;
            }
            break;
        case 0:
        case 0x1A:
            // end of file
            c = '\\';
            break;
        default:
            if (isoctal(cast(char)c))
            {
                uint v = 0;
                int n = 0;
                do
                {
                    v = v * 8 + (c - '0');
                    c = *++p;
                }
                while (++n < 3 && isoctal(cast(char)c));
                c = v;
                if (c > 0xFF)
                    error(loc, "escape octal sequence \\%03o is larger than \\377", c);
            }
            else
            {
                error(loc, "undefined escape sequence \\%c", c);
                p++;
            }
            break;
        }
        return c;
    }

    /**
    Lex a wysiwyg string. `p` must be pointing to the first character before the
    contents of the string literal. The character pointed to by `p` will be used as
    the terminating character (i.e. backtick or double-quote).
    Params:
        result = pointer to the token that accepts the result
    */
    private void wysiwygStringConstant(Token* result)
    {
        result.value = TOK.string_;
        Loc start = loc();
        auto terminator = p[0];
        p++;
        stringbuffer.setsize(0);
        while (1)
        {
            dchar c = p[0];
            p++;
            switch (c)
            {
            case '\n':
                endOfLine();
                break;
            case '\r':
                if (p[0] == '\n')
                    continue; // ignore
                c = '\n'; // treat EndOfLine as \n character
                endOfLine();
                break;
            case 0:
            case 0x1A:
                error("unterminated string constant starting at %s", start.toChars());
                result.setString();
                // rewind `p` so it points to the EOF character
                p--;
                return;
            default:
                if (c == terminator)
                {
                    result.setString(stringbuffer);
                    stringPostfix(result);
                    return;
                }
                else if (c & 0x80)
                {
                    p--;
                    const u = decodeUTF();
                    p++;
                    if (u == PS || u == LS)
                        endOfLine();
                    stringbuffer.writeUTF8(u);
                    continue;
                }
                break;
            }
            stringbuffer.writeByte(c);
        }
    }

    /**
    Lex a delimited string. Some examples of delimited strings are:
    ---
    q"(foo(xxx))"      // "foo(xxx)"
    q"[foo$(LPAREN)]"  // "foo$(LPAREN)"
    q"/foo]/"          // "foo]"
    q"HERE
    foo
    HERE"              // "foo\n"
    ---
    It is assumed that `p` points to the opening double-quote '"'.
    Params:
        result = pointer to the token that accepts the result
    */
    private void delimitedStringConstant(Token* result)
    {
        result.value = TOK.string_;
        Loc start = loc();
        dchar delimleft = 0;
        dchar delimright = 0;
        uint nest = 1;
        uint nestcount = ~0; // dead assignment, needed to suppress warning
        Identifier hereid = null;
        uint blankrol = 0;
        uint startline = 0;
        p++;
        stringbuffer.setsize(0);
        while (1)
        {
            dchar c = *p++;
            //printf("c = '%c'\n", c);
            switch (c)
            {
            case '\n':
            Lnextline:
                endOfLine();
                startline = 1;
                if (blankrol)
                {
                    blankrol = 0;
                    continue;
                }
                if (hereid)
                {
                    stringbuffer.writeUTF8(c);
                    continue;
                }
                break;
            case '\r':
                if (*p == '\n')
                    continue; // ignore
                c = '\n'; // treat EndOfLine as \n character
                goto Lnextline;
            case 0:
            case 0x1A:
                error("unterminated delimited string constant starting at %s", start.toChars());
                result.setString();
                // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
                p--;
                return;
            default:
                if (c & 0x80)
                {
                    p--;
                    c = decodeUTF();
                    p++;
                    if (c == PS || c == LS)
                        goto Lnextline;
                }
                break;
            }
            if (delimleft == 0)
            {
                delimleft = c;
                nest = 1;
                nestcount = 1;
                if (c == '(')
                    delimright = ')';
                else if (c == '{')
                    delimright = '}';
                else if (c == '[')
                    delimright = ']';
                else if (c == '<')
                    delimright = '>';
                else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
                {
                    // Start of identifier; must be a heredoc
                    Token tok;
                    p--;
                    scan(&tok); // read in heredoc identifier
                    if (tok.value != TOK.identifier)
                    {
                        error("identifier expected for heredoc, not %s", tok.toChars());
                        delimright = c;
                    }
                    else
                    {
                        hereid = tok.ident;
                        //printf("hereid = '%s'\n", hereid.toChars());
                        blankrol = 1;
                    }
                    nest = 0;
                }
                else
                {
                    delimright = c;
                    nest = 0;
                    if (isspace(c))
                        error("delimiter cannot be whitespace");
                }
            }
            else
            {
                if (blankrol)
                {
                    error("heredoc rest of line should be blank");
                    blankrol = 0;
                    continue;
                }
                if (nest == 1)
                {
                    if (c == delimleft)
                        nestcount++;
                    else if (c == delimright)
                    {
                        nestcount--;
                        if (nestcount == 0)
                            goto Ldone;
                    }
                }
                else if (c == delimright)
                    goto Ldone;
                if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid)
                {
                    Token tok;
                    auto psave = p;
                    p--;
                    scan(&tok); // read in possible heredoc identifier
                    //printf("endid = '%s'\n", tok.ident.toChars());
                    if (tok.value == TOK.identifier && tok.ident is hereid)
                    {
                        /* should check that rest of line is blank
                         */
                        goto Ldone;
                    }
                    p = psave;
                }
                stringbuffer.writeUTF8(c);
                startline = 0;
            }
        }
    Ldone:
        if (*p == '"')
            p++;
        else if (hereid)
            error("delimited string must end in `%s\"`", hereid.toChars());
        else if (isspace(delimright))
            error("delimited string must end in `\"`");
        else
            error("delimited string must end in `%c\"`", delimright);
        result.setString(stringbuffer);
        stringPostfix(result);
    }

    /**
    Lex a token string. Some examples of token strings are:
    ---
    q{ foo(xxx) }    // " foo(xxx) "
    q{foo$(LPAREN)}  // "foo$(LPAREN)"
    q{{foo}"}"}      // "{foo}"}""
    ---
    It is assumed that `p` points to the opening curly-brace.
    Params:
        result = pointer to the token that accepts the result
    */
    private void tokenStringConstant(Token* result)
    {
        result.value = TOK.string_;

        uint nest = 1;
        const start = loc();
        const pstart = ++p;
        inTokenStringConstant++;
        scope(exit) inTokenStringConstant--;
        while (1)
        {
            Token tok;
            scan(&tok);
            switch (tok.value)
            {
            case TOK.leftCurly:
                nest++;
                continue;
            case TOK.rightCurly:
                if (--nest == 0)
                {
                    result.setString(pstart, p - 1 - pstart);
                    stringPostfix(result);
                    return;
                }
                continue;
            case TOK.endOfFile:
                error("unterminated token string constant starting at %s", start.toChars());
                result.setString();
                return;
            default:
                continue;
            }
        }
    }

    /**
    Scan a quoted string while building the processed string value by
    handling escape sequences. The result is returned in the given `t` token.
    This function assumes that `p` currently points to the opening quote
    of the string.
    Params:
        t = the token to set the resulting string to
    * References:
    *   D https://dlang.org/spec/lex.html#double_quoted_strings
    *   ImportC C11 6.4.5
    */
    private void escapeStringConstant(Token* t)
    {
        t.value = TOK.string_;

        const start = loc();
        const tc = *p++;        // opening quote
        stringbuffer.setsize(0);
        while (1)
        {
            dchar c = *p++;
            switch (c)
            {
            case '\\':
                switch (*p)
                {
                case '&':
                    if (Ccompile)
                        goto default;
                    goto case;

                case 'u':
                case 'U':
                    c = escapeSequence();
                    stringbuffer.writeUTF8(c);
                    continue;
                default:
                    c = escapeSequence();
                    break;
                }
                break;
            case '\n':
                endOfLine();
                if (Ccompile)
                    goto Lunterminated;
                break;
            case '\r':
                if (*p == '\n')
                    continue; // ignore
                c = '\n'; // treat EndOfLine as \n character
                endOfLine();
                if (Ccompile)
                    goto Lunterminated;
                break;
            case '\'':
            case '"':
                if (c != tc)
                    goto default;
                t.setString(stringbuffer);
                if (!Ccompile)
                    stringPostfix(t);
                return;
            case 0:
            case 0x1A:
                // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
                p--;
            Lunterminated:
                error("unterminated string constant starting at %s", start.toChars());
                t.setString();
                return;
            default:
                if (c & 0x80)
                {
                    p--;
                    c = decodeUTF();
                    if (c == LS || c == PS)
                    {
                        c = '\n';
                        endOfLine();
                        if (Ccompile)
                            goto Lunterminated;
                    }
                    p++;
                    stringbuffer.writeUTF8(c);
                    continue;
                }
                break;
            }
            stringbuffer.writeByte(c);
        }
    }

    /**************************************
     * Reference:
     *    https://dlang.org/spec/lex.html#characterliteral
     */
    private TOK charConstant(Token* t)
    {
        TOK tk = TOK.charLiteral;
        //printf("Lexer::charConstant\n");
        p++;
        dchar c = *p++;
        switch (c)
        {
        case '\\':
            switch (*p)
            {
            case 'u':
                t.unsvalue = escapeSequence();
                tk = TOK.wcharLiteral;
                break;
            case 'U':
            case '&':
                t.unsvalue = escapeSequence();
                tk = TOK.dcharLiteral;
                break;
            default:
                t.unsvalue = escapeSequence();
                break;
            }
            break;
        case '\n':
        L1:
            endOfLine();
            goto case;
        case '\r':
            goto case '\'';
        case 0:
        case 0x1A:
            // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
            p--;
            goto case;
        case '\'':
            error("unterminated character constant");
            t.unsvalue = '?';
            return tk;
        default:
            if (c & 0x80)
            {
                p--;
                c = decodeUTF();
                p++;
                if (c == LS || c == PS)
                    goto L1;
                if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
                    tk = TOK.wcharLiteral;
                else
                    tk = TOK.dcharLiteral;
            }
            t.unsvalue = c;
            break;
        }
        if (*p != '\'')
        {
            while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' &&
                    *p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}')
            {
                if (*p & 0x80)
                {
                    const s = p;
                    c = decodeUTF();
                    if (c == LS || c == PS)
                    {
                        p = s;
                        break;
                    }
                }
                p++;
            }

            if (*p == '\'')
            {
                error("character constant has multiple characters");
                p++;
            }
            else
                error("unterminated character constant");
            t.unsvalue = '?';
            return tk;
        }
        p++;
        return tk;
    }

    /***************************************
     * Lex C character constant.
     * Parser is on the opening quote.
     * Params:
     *  t = token to fill in
     *  prefix = one of `u`, `U` or 0.
     * Reference:
     *  C11 6.4.4.4
     */
    private void clexerCharConstant(ref Token t, char prefix)
    {
        escapeStringConstant(&t);
        const(char)[] str = t.ustring[0 .. t.len];
        const n = str.length;
        const loc = t.loc;
        if (n == 0)
        {
            error(loc, "empty character constant");
            t.value = TOK.semicolon;
            return;
        }

        uint u;
        switch (prefix)
        {
            case 0:
                if (n == 1) // fast case
                {
                    u = str[0];
                }
                else if (n > 4)
                    error(loc, "max number of chars in character literal is 4, had %d",
                        cast(int)n);
                else
                {
                    foreach (i, c; str)
                        (cast(char*)&u)[n - 1 - i] = c;
                }
                break;

            case 'u':
                dchar d1;
                size_t idx;
                auto msg = utf_decodeChar(str, idx, d1);
                dchar d2 = 0;
                if (idx < n && !msg)
                    msg = utf_decodeChar(str, idx, d2);
                if (msg)
                    error(loc, "%s", msg);
                else if (idx < n)
                    error(loc, "max number of chars in 16 bit character literal is 2, had %d",
                        (n + 1) >> 1);
                else if (d1 > 0x1_0000)
                    error(loc, "%d does not fit in 16 bits", d1);
                else if (d2 > 0x1_0000)
                    error(loc, "%d does not fit in 16 bits", d2);
                u = d1;
                if (d2)
                    u = (d1 << 16) | d2;
                break;

            case 'U':
                dchar d;
                size_t idx;
                auto msg = utf_decodeChar(str, idx, d);
                if (msg)
                    error(loc, "%s", msg);
                else if (idx < n)
                    error(loc, "max number of chars in 32 bit character literal is 1, had %d",
                        (n + 3) >> 2);
                u = d;
                break;

            default:
                assert(0);
        }
        t.value = n == 1 ? TOK.charLiteral : TOK.int32Literal;
        t.unsvalue = u;
    }

    /***************************************
     * Get postfix of string literal.
     */
    private void stringPostfix(Token* t) pure @nogc
    {
        switch (*p)
        {
        case 'c':
        case 'w':
        case 'd':
            t.postfix = *p;
            p++;
            break;
        default:
            t.postfix = 0;
            break;
        }
    }

    /**************************************
     * Read in a number.
     * If it's an integer, store it in tok.TKutok.Vlong.
     *      integers can be decimal, octal or hex
     *      Handle the suffixes U, UL, LU, L, etc.
     * If it's double, store it in tok.TKutok.Vdouble.
     * Returns:
     *      TKnum
     *      TKdouble,...
     */
    private TOK number(Token* t)
    {
        int base = 10;
        const start = p;
        uinteger_t n = 0; // unsigned >=64 bit integer type
        int d;
        bool err = false;
        bool overflow = false;
        bool anyBinaryDigitsNoSingleUS = false;
        bool anyHexDigitsNoSingleUS = false;
        char errorDigit = 0;
        dchar c = *p;
        if (c == '0')
        {
            ++p;
            c = *p;
            switch (c)
            {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
                base = 8;
                break;

            case '8':
            case '9':
                errorDigit = cast(char) c;
                base = 8;
                break;
            case 'x':
            case 'X':
                ++p;
                base = 16;
                break;
            case 'b':
            case 'B':
                if (Ccompile)
                    error("binary constants not allowed");
                ++p;
                base = 2;
                break;
            case '.':
                if (p[1] == '.')
                    goto Ldone; // if ".."
                if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
                {
                    if (Ccompile && (p[1] == 'f' || p[1] == 'F' || p[1] == 'l' || p[1] == 'L'))
                        goto Lreal;  // if `0.f` or `0.L`
                    goto Ldone; // if ".identifier" or ".unicode"
                }
                goto Lreal; // '.' is part of current token
            case 'i':
            case 'f':
            case 'F':
                goto Lreal;
            case '_':
                if (Ccompile)
                    error("embedded `_` not allowed");
                ++p;
                base = 8;
                break;
            case 'L':
                if (p[1] == 'i')
                    goto Lreal;
                break;
            default:
                break;
            }
        }
        while (1)
        {
            c = *p;
            switch (c)
            {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
                ++p;
                d = c - '0';
                break;
            case 'a':
            case 'b':
            case 'c':
            case 'd':
            case 'e':
            case 'f':
            case 'A':
            case 'B':
            case 'C':
            case 'D':
            case 'E':
            case 'F':
                ++p;
                if (base != 16)
                {
                    if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
                        goto Lreal;
                }
                if (c >= 'a')
                    d = c + 10 - 'a';
                else
                    d = c + 10 - 'A';
                break;
            case 'L':
                if (p[1] == 'i')
                    goto Lreal;
                goto Ldone;
            case '.':
                if (p[1] == '.')
                    goto Ldone; // if ".."
                if (base <= 10 && n > 0 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
                {
                    if (Ccompile && base == 10 &&
                        (p[1] == 'e' || p[1] == 'E' || p[1] == 'f' || p[1] == 'F' || p[1] == 'l' || p[1] == 'L'))
                        goto Lreal;  // if `1.e6` or `1.f` or `1.L`
                    goto Ldone; // if ".identifier" or ".unicode"
                }
                if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80))
                    goto Ldone; // if ".identifier" or ".unicode"
                if (base == 2)
                    goto Ldone; // if ".identifier" or ".unicode"
                goto Lreal; // otherwise as part of a floating point literal
            case 'p':
            case 'P':
            case 'i':
            Lreal:
                p = start;
                return inreal(t);
            case '_':
                if (Ccompile)
                    goto default;
                ++p;
                continue;
            default:
                goto Ldone;
            }
            // got a digit here, set any necessary flags, check for errors
            anyHexDigitsNoSingleUS = true;
            anyBinaryDigitsNoSingleUS = true;
            if (!errorDigit && d >= base)
            {
                errorDigit = cast(char) c;
            }
            // Avoid expensive overflow check if we aren't at risk of overflow
            if (n <= 0x0FFF_FFFF_FFFF_FFFFUL)
                n = n * base + d;
            else
            {
                import core.checkedint : mulu, addu;

                n = mulu(n, base, overflow);
                n = addu(n, d, overflow);
            }
        }
    Ldone:
        if (errorDigit)
        {
            error("%s digit expected, not `%c`", base == 2 ? "binary".ptr :
                                                 base == 8 ? "octal".ptr :
                                                 "decimal".ptr, errorDigit);
            err = true;
        }
        if (overflow && !err)
        {
            error("integer overflow");
            err = true;
        }
        if ((base == 2 && !anyBinaryDigitsNoSingleUS) ||
            (base == 16 && !anyHexDigitsNoSingleUS))
            error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start);

        t.unsvalue = n;

        if (Ccompile)
            return cnumber(base, n);

        enum FLAGS : int
        {
            none = 0,
            decimal = 1, // decimal
            unsigned = 2, // u or U suffix
            long_ = 4, // L suffix
        }

        FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none;
        // Parse trailing 'u', 'U', 'l' or 'L' in any combination
        const psuffix = p;
        while (1)
        {
            FLAGS f;
            switch (*p)
            {
            case 'U':
            case 'u':
                f = FLAGS.unsigned;
                goto L1;
            case 'l':
                f = FLAGS.long_;
                error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
                goto L1;
            case 'L':
                f = FLAGS.long_;
            L1:
                p++;
                if ((flags & f) && !err)
                {
                    error("unrecognized token");
                    err = true;
                }
                flags = cast(FLAGS)(flags | f);
                continue;
            default:
                break;
            }
            break;
        }
        if (base == 8 && n >= 8)
        {
            if (err)
                // can't translate invalid octal value, just show a generic message
                error("octal literals larger than 7 are no longer supported");
            else
                error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!\"%llo%.*s\"` instead",
                    n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix);
        }
        TOK result;
        switch (flags)
        {
        case FLAGS.none:
            /* Octal or Hexadecimal constant.
             * First that fits: int, uint, long, ulong
             */
            if (n & 0x8000000000000000L)
                result = TOK.uns64Literal;
            else if (n & 0xFFFFFFFF00000000L)
                result = TOK.int64Literal;
            else if (n & 0x80000000)
                result = TOK.uns32Literal;
            else
                result = TOK.int32Literal;
            break;
        case FLAGS.decimal:
            /* First that fits: int, long, long long
             */
            if (n & 0x8000000000000000L)
            {
                result = TOK.uns64Literal;
            }
            else if (n & 0xFFFFFFFF80000000L)
                result = TOK.int64Literal;
            else
                result = TOK.int32Literal;
            break;
        case FLAGS.unsigned:
        case FLAGS.decimal | FLAGS.unsigned:
            /* First that fits: uint, ulong
             */
            if (n & 0xFFFFFFFF00000000L)
                result = TOK.uns64Literal;
            else
                result = TOK.uns32Literal;
            break;
        case FLAGS.decimal | FLAGS.long_:
            if (n & 0x8000000000000000L)
            {
                if (!err)
                {
                    error("signed integer overflow");
                    err = true;
                }
                result = TOK.uns64Literal;
            }
            else
                result = TOK.int64Literal;
            break;
        case FLAGS.long_:
            if (n & 0x8000000000000000L)
                result = TOK.uns64Literal;
            else
                result = TOK.int64Literal;
            break;
        case FLAGS.unsigned | FLAGS.long_:
        case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_:
            result = TOK.uns64Literal;
            break;
        default:
            debug
            {
                printf("%x\n", flags);
            }
            assert(0);
        }
        return result;
    }

    /**************************************
     * Lex C integer-suffix
     * Params:
     *  base = number base
     *  n = raw integer value
     * Returns:
     *  token value
     */
    private TOK cnumber(int base, uinteger_t n)
    {
        /* C11 6.4.4.1
         * Parse trailing suffixes:
         *   u or U
         *   l or L
         *   ll or LL
         */
        enum FLAGS : uint
        {
            octalhex = 1, // octal or hexadecimal
            decimal  = 2, // decimal
            unsigned = 4, // u or U suffix
            long_    = 8, // l or L suffix
            llong    = 0x10 // ll or LL
        }
        FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.octalhex;
        bool err;
    Lsuffixes:
        while (1)
        {
            FLAGS f;
            const cs = *p;
            switch (cs)
            {
                case 'U':
                case 'u':
                    f = FLAGS.unsigned;
                    break;

                case 'l':
                case 'L':
                    f = FLAGS.long_;
                    if (cs == p[1])
                    {
                        f = FLAGS.long_ | FLAGS.llong;
                        ++p;
                    }
                    break;

                default:
                    break Lsuffixes;
            }
            ++p;
            if ((flags & f) && !err)
            {
                error("duplicate integer suffixes");
                err = true;
            }
            flags = cast(FLAGS)(flags | f);
        }

        TOK result = TOK.int32Literal;     // default
        switch (flags)
        {
            /* Since D doesn't have a variable sized `long` or `unsigned long` type,
             * this code deviates from C by picking D int, uint, long, or ulong instead
             */

            case FLAGS.octalhex:
                /* Octal or Hexadecimal constant.
                 * First that fits: int, unsigned, long, unsigned long,
                 * long long, unsigned long long
                 */
                if (n & 0x8000000000000000L)
                    result = TOK.uns64Literal;      // unsigned long
                else if (n & 0xFFFFFFFF00000000L)
                    result = TOK.int64Literal;      // long
                else if (n & 0x80000000)
                    result = TOK.uns32Literal;
                else
                    result = TOK.int32Literal;
                break;

            case FLAGS.decimal:
                /* First that fits: int, long, long long
                 */
                if (n & 0x8000000000000000L)
                    result = TOK.uns64Literal;      // unsigned long
                else if (n & 0xFFFFFFFF80000000L)
                    result = TOK.int64Literal;      // long
                else
                    result = TOK.int32Literal;
                break;

            case FLAGS.octalhex | FLAGS.unsigned:
            case FLAGS.decimal | FLAGS.unsigned:
                /* First that fits: unsigned, unsigned long, unsigned long long
                 */
                if (n & 0xFFFFFFFF00000000L)
                    result = TOK.uns64Literal;      // unsigned long
                else
                    result = TOK.uns32Literal;
                break;

            case FLAGS.decimal | FLAGS.long_:
                /* First that fits: long, long long
                 */
                if (longsize == 4 || long_longsize == 4)
                {
                    if (n & 0xFFFFFFFF_80000000L)
                        result = TOK.int64Literal;
                    else
                        result = TOK.int32Literal;  // long
                }
                else
                {
                    result = TOK.int64Literal;      // long
                }
                break;

            case FLAGS.octalhex | FLAGS.long_:
                /* First that fits: long, unsigned long, long long,
                 * unsigned long long
                 */
                if (longsize == 4 || long_longsize == 4)
                {
                    if (n & 0x8000000000000000L)
                        result = TOK.uns64Literal;
                    else if (n & 0xFFFFFFFF00000000L)
                        result = TOK.int64Literal;
                    else if (n & 0x80000000)
                        result = TOK.uns32Literal;      // unsigned long
                    else
                        result = TOK.int32Literal;      // long
                }
                else
                {
                    if (n & 0x80000000_00000000L)
                        result = TOK.uns64Literal;      // unsigned long
                    else
                        result = TOK.int64Literal;      // long
                }
                break;

            case FLAGS.octalhex | FLAGS.unsigned | FLAGS.long_:
            case FLAGS.decimal  | FLAGS.unsigned | FLAGS.long_:
                /* First that fits: unsigned long, unsigned long long
                 */
                if (longsize == 4 || long_longsize == 4)
                {
                    if (n & 0xFFFFFFFF00000000L)
                        result = TOK.uns64Literal;
                    else
                        result = TOK.uns32Literal;      // unsigned long
                }
                else
                {
                    result = TOK.uns64Literal;  // unsigned long
                }
                break;

            case FLAGS.octalhex | FLAGS.long_ | FLAGS.llong:
                /* First that fits: long long, unsigned long long
                 */
                if (n & 0x8000000000000000L)
                    result = TOK.uns64Literal;
                else
                    result = TOK.int64Literal;
                break;

            case FLAGS.decimal | FLAGS.long_ | FLAGS.llong:
                /* long long
                 */
                result = TOK.int64Literal;
                break;

            case FLAGS.octalhex | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong:
            case FLAGS.decimal  | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong:
                result = TOK.uns64Literal;
                break;

            default:
                debug printf("%x\n",flags);
                assert(0);
        }
        return result;
    }

    /**************************************
     * Read in characters, converting them to real.
     * Bugs:
     *      Exponent overflow not detected.
     *      Too much requested precision is not detected.
     */
    private TOK inreal(Token* t)
    {
        //printf("Lexer::inreal()\n");
        debug
        {
            assert(*p == '.' || isdigit(*p));
        }
        bool isWellformedString = true;
        stringbuffer.setsize(0);
        auto pstart = p;
        bool hex = false;
        dchar c = *p++;
        // Leading '0x'
        if (c == '0')
        {
            c = *p++;
            if (c == 'x' || c == 'X')
            {
                hex = true;
                c = *p++;
            }
        }
        // Digits to left of '.'
        while (1)
        {
            if (c == '.')
            {
                c = *p++;
                break;
            }
            if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
            {
                c = *p++;
                continue;
            }
            break;
        }
        // Digits to right of '.'
        while (1)
        {
            if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
            {
                c = *p++;
                continue;
            }
            break;
        }
        if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
        {
            c = *p++;
            if (c == '-' || c == '+')
            {
                c = *p++;
            }
            bool anyexp = false;
            while (1)
            {
                if (isdigit(c))
                {
                    anyexp = true;
                    c = *p++;
                    continue;
                }
                if (c == '_')
                {
                    if (Ccompile)
                        error("embedded `_` in numeric literals not allowed");
                    c = *p++;
                    continue;
                }
                if (!anyexp)
                {
                    error("missing exponent");
                    isWellformedString = false;
                }
                break;
            }
        }
        else if (hex)
        {
            error("exponent required for hex float");
            isWellformedString = false;
        }
        --p;
        while (pstart < p)
        {
            if (*pstart != '_')
                stringbuffer.writeByte(*pstart);
            ++pstart;
        }
        stringbuffer.writeByte(0);
        auto sbufptr = cast(const(char)*)stringbuffer[].ptr;
        TOK result;
        bool isOutOfRange = false;
        t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, isOutOfRange) : CTFloat.zero);
        switch (*p)
        {
        case 'F':
        case 'f':
            if (isWellformedString && !isOutOfRange)
                isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr);
            result = TOK.float32Literal;
            p++;
            break;
        default:
            if (isWellformedString && !isOutOfRange)
                isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr);
            result = TOK.float64Literal;
            break;
        case 'l':
            if (!Ccompile)
                error("use 'L' suffix instead of 'l'");
            goto case 'L';
        case 'L':
            ++p;
            if (Ccompile && long_doublesize == 8)
                goto default;
            result = TOK.float80Literal;
            break;
        }
        if ((*p == 'i' || *p == 'I') && !Ccompile)
        {
            if (*p == 'I')
                error("use 'i' suffix instead of 'I'");
            p++;
            switch (result)
            {
            case TOK.float32Literal:
                result = TOK.imaginary32Literal;
                break;
            case TOK.float64Literal:
                result = TOK.imaginary64Literal;
                break;
            case TOK.float80Literal:
                result = TOK.imaginary80Literal;
                break;
            default:
                break;
            }
        }
        const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal);
        if (isOutOfRange && !isLong && (!Ccompile || hex))
        {
            /* C11 6.4.4.2 doesn't actually care if it is not representable if it is not hex
             */
            const char* suffix = result == TOK.float32Literal ? "f" : result == TOK.float80Literal ? "L" : "";
            const char* type = [TOK.float32Literal: "`float`".ptr,
                                TOK.float64Literal: "`double`".ptr,
                                TOK.float80Literal: "`real` for the current target".ptr][result];
            error(scanloc, "number `%s%s` is not representable as a %s", sbufptr, suffix, type);
            const char* extra = result == TOK.float64Literal ? "`real` literals can be written using the `L` suffix. " : "";
            errorSupplemental(scanloc, "%shttps://dlang.org/spec/lex.html#floatliteral", extra);
        }
        debug
        {
            switch (result)
            {
            case TOK.float32Literal:
            case TOK.float64Literal:
            case TOK.float80Literal:
            case TOK.imaginary32Literal:
            case TOK.imaginary64Literal:
            case TOK.imaginary80Literal:
                break;
            default:
                assert(0);
            }
        }
        return result;
    }

    final Loc loc() pure @nogc
    {
        scanloc.charnum = cast(uint)(1 + p - line);
        version (LocOffset)
            scanloc.fileOffset = cast(uint)(p - base);
        return scanloc;
    }

    final void error(const(char)* format, ...)
    {
        va_list args;
        va_start(args, format);
        .verror(token.loc, format, args);
        va_end(args);
    }

    final void error(const ref Loc loc, const(char)* format, ...)
    {
        va_list args;
        va_start(args, format);
        .verror(loc, format, args);
        va_end(args);
    }

    final void deprecation(const(char)* format, ...)
    {
        va_list args;
        va_start(args, format);
        .vdeprecation(token.loc, format, args);
        va_end(args);
    }

    /***************************************
     * Parse special token sequence:
     * Returns:
     *  true if the special token sequence was handled
     * References:
     *  https://dlang.org/spec/lex.html#special-token-sequence
     */
    bool parseSpecialTokenSequence()
    {
        Token n;
        scan(&n);
        if (n.value == TOK.identifier)
        {
            if (n.ident == Id.line)
            {
                poundLine(n, false);
                return true;
            }
            else
            {
                const locx = loc();
                warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars());
            }
        }
        else if (n.value == TOK.if_)
        {
            error("C preprocessor directive `#if` is not supported, use `version` or `static if`");
        }
        return false;
    }

    /*********************************************
     * Parse line/file preprocessor directive:
     *    #line linnum [filespec]
     * Allow __LINE__ for linnum, and __FILE__ for filespec.
     * Accept linemarker format:
     *    # linnum [filespec] {flags}
     * There can be zero or more flags, which are one of the digits 1..4, and
     * must be in ascending order. The flags are ignored.
     * Params:
     *  tok = token we're on, which is linnum of linemarker
     *  linemarker = true if line marker format and lexer is on linnum
     * References:
     *  linemarker https://gcc.gnu.org/onlinedocs/gcc-11.1.0/cpp/Preprocessor-Output.html
     */
    final void poundLine(ref Token tok, bool linemarker)
    {
        auto linnum = this.scanloc.linnum;
        const(char)* filespec = null;
        bool flags;

        if (!linemarker)
            scan(&tok);
        if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal)
        {
            const lin = cast(int)(tok.unsvalue);
            if (lin != tok.unsvalue)
            {
                error(tok.loc, "line number `%lld` out of range", cast(ulong)tok.unsvalue);
                skipToNextLine();
                return;
            }
            else
                linnum = lin;
        }
        else if (tok.value == TOK.line)  // #line __LINE__
        {
        }
        else
        {
            error(tok.loc, "positive integer argument expected following `#line`");
            if (tok.value != TOK.endOfLine)
                skipToNextLine();
            return;
        }
        while (1)
        {
            scan(&tok);
            switch (tok.value)
            {
            case TOK.endOfFile:
            case TOK.endOfLine:
                if (!inTokenStringConstant)
                {
                    this.scanloc.linnum = linnum;
                    if (filespec)
                        this.scanloc.filename = filespec;
                }
                return;
            case TOK.file:
                if (filespec || flags)
                    goto Lerr;
                filespec = mem.xstrdup(scanloc.filename);
                continue;
            case TOK.string_:
                if (filespec || flags)
                    goto Lerr;
                if (tok.ptr[0] != '"' || tok.postfix != 0)
                    goto Lerr;
                filespec = tok.ustring;
                continue;
            case TOK.int32Literal:
                if (!filespec)
                    goto Lerr;
                if (linemarker && tok.unsvalue >= 1 && tok.unsvalue <= 4)
                {
                    flags = true;   // linemarker flags seen
                    continue;
                }
                goto Lerr;
            default:
                goto Lerr;
            }
        }
    Lerr:
        if (filespec is null)
            error(tok.loc, "invalid filename for `#line` directive");
        else if (linemarker)
            error(tok.loc, "invalid flag for line marker directive");
        else if (!Ccompile)
            error(tok.loc, "found `%s` when expecting new line following `#line` directive", tok.toChars());
        if (tok.value != TOK.endOfLine)
            skipToNextLine();
    }

    /***************************************
     * Scan forward to start of next line.
     * Params:
     *    defines = send characters to `defines`
     */
    final void skipToNextLine(OutBuffer* defines = null)
    {
        while (1)
        {
            switch (*p)
            {
            case 0:
            case 0x1A:
                return; // do not advance p

            case '\n':
                ++p;
                break;

            case '\r':
                ++p;
                if (p[0] == '\n')
                   ++p;
                break;

            default:
                if (defines)
                    defines.writeByte(*p); // don't care about Unicode line endings for C
                else if (*p & 0x80)
                {
                    const u = decodeUTF();
                    if (u == PS || u == LS)
                    {
                        ++p;
                        break;
                    }
                }
                ++p;
                continue;
            }
            break;
        }
        endOfLine();
        tokenizeNewlines = false;
    }

    /********************************************
     * Decode UTF character.
     * Issue error messages for invalid sequences.
     * Return decoded character, advance p to last character in UTF sequence.
     */
    private uint decodeUTF()
    {
        const s = p;
        assert(*s & 0x80);
        // Check length of remaining string up to 4 UTF-8 characters
        size_t len;
        for (len = 1; len < 4 && s[len]; len++)
        {
        }
        size_t idx = 0;
        dchar u;
        const msg = utf_decodeChar(s[0 .. len], idx, u);
        p += idx - 1;
        if (msg)
        {
            error("%.*s", cast(int)msg.length, msg.ptr);
        }
        return u;
    }

    /***************************************************
     * Parse doc comment embedded between t.ptr and p.
     * Remove trailing blanks and tabs from lines.
     * Replace all newlines with \n.
     * Remove leading comment character from each line.
     * Decide if it's a lineComment or a blockComment.
     * Append to previous one for this token.
     *
     * If newParagraph is true, an extra newline will be
     * added between adjoining doc comments.
     */
    private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure
    {
        /* ct tells us which kind of comment it is: '/', '*', or '+'
         */
        const ct = t.ptr[2];
        /* Start of comment text skips over / * *, / + +, or / / /
         */
        const(char)* q = t.ptr + 3; // start of comment text
        const(char)* qend = p;
        if (ct == '*' || ct == '+')
            qend -= 2;
        /* Scan over initial row of ****'s or ++++'s or ////'s
         */
        for (; q < qend; q++)
        {
            if (*q != ct)
                break;
        }
        /* Remove leading spaces until start of the comment
         */
        int linestart = 0;
        if (ct == '/')
        {
            while (q < qend && (*q == ' ' || *q == '\t'))
                ++q;
        }
        else if (q < qend)
        {
            if (*q == '\r')
            {
                ++q;
                if (q < qend && *q == '\n')
                    ++q;
                linestart = 1;
            }
            else if (*q == '\n')
            {
                ++q;
                linestart = 1;
            }
        }
        /* Remove trailing row of ****'s or ++++'s
         */
        if (ct != '/')
        {
            for (; q < qend; qend--)
            {
                if (qend[-1] != ct)
                    break;
            }
        }
        /* Comment is now [q .. qend].
         * Canonicalize it into buf[].
         */
        OutBuffer buf;

        void trimTrailingWhitespace()
        {
            const s = buf[];
            auto len = s.length;
            while (len && (s[len - 1] == ' ' || s[len - 1] == '\t'))
                --len;
            buf.setsize(len);
        }

        for (; q < qend; q++)
        {
            char c = *q;
            switch (c)
            {
            case '*':
            case '+':
                if (linestart && c == ct)
                {
                    linestart = 0;
                    /* Trim preceding whitespace up to preceding \n
                     */
                    trimTrailingWhitespace();
                    continue;
                }
                break;
            case ' ':
            case '\t':
                break;
            case '\r':
                if (q[1] == '\n')
                    continue; // skip the \r
                goto Lnewline;
            default:
                if (c == 226)
                {
                    // If LS or PS
                    if (q[1] == 128 && (q[2] == 168 || q[2] == 169))
                    {
                        q += 2;
                        goto Lnewline;
                    }
                }
                linestart = 0;
                break;
            Lnewline:
                c = '\n'; // replace all newlines with \n
                goto case;
            case '\n':
                linestart = 1;
                /* Trim trailing whitespace
                 */
                trimTrailingWhitespace();
                break;
            }
            buf.writeByte(c);
        }
        /* Trim trailing whitespace (if the last line does not have newline)
         */
        trimTrailingWhitespace();

        // Always end with a newline
        const s = buf[];
        if (s.length == 0 || s[$ - 1] != '\n')
            buf.writeByte('\n');

        // It's a line comment if the start of the doc comment comes
        // after other non-whitespace on the same line.
        auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment;
        // Combine with previous doc comment, if any
        if (*dc)
            *dc = combineComments(*dc, buf[], newParagraph).toDString();
        else
            *dc = buf.extractSlice(true);
    }

    /********************************************
     * Combine two document comments into one,
     * separated by an extra newline if newParagraph is true.
     */
    static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure
    {
        //debug printf("Lexer::combineComments('%*.s', '%*.s', '%i')\n", cast(int) c1.length, c1.ptr, cast(int) c2.length, c2.ptr, newParagraph);
        const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n'
        if (!c1)
            return c2.ptr;
        if (!c2)
            return c1.ptr;

        int insertNewLine = 0;
        if (c1.length && c1[$ - 1] != '\n')
            insertNewLine = 1;
        const retSize = c1.length + insertNewLine + newParagraphSize + c2.length;
        auto p = cast(char*)mem.xmalloc_noscan(retSize + 1);
        p[0 .. c1.length] = c1[];
        if (insertNewLine)
            p[c1.length] = '\n';
        if (newParagraph)
            p[c1.length + insertNewLine] = '\n';
        p[retSize - c2.length .. retSize] = c2[];
        p[retSize] = 0;
        return p;
    }

    /**************************
     * `p` should be at start of next line
     */
    private void endOfLine() pure @nogc @safe
    {
        scanloc.linnum++;
        line = p;
    }
}


/******************************* Private *****************************************/

private:

/// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__`
private struct TimeStampInfo
{
    private __gshared bool initdone = false;

    // Note: Those properties need to be guarded by a call to `init`
    // The API isn't safe, and quite brittle, but it was left this way
    // over performance concerns.
    // This is currently only called once, from the lexer.
    __gshared char[11 + 1] date;
    __gshared char[8 + 1] time;
    __gshared char[24 + 1] timestamp;

    public static void initialize(const ref Loc loc) nothrow
    {
        if (initdone)
            return;

        initdone = true;
        time_t ct;
        // https://issues.dlang.org/show_bug.cgi?id=20444
        if (auto p = getenv("SOURCE_DATE_EPOCH"))
        {
            if (!ct.parseDigits(p.toDString()))
                error(loc, "value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p);
        }
        else
            .time(&ct);
        const p = ctime(&ct);
        assert(p);
        sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
        sprintf(&time[0], "%.8s", p + 11);
        sprintf(&timestamp[0], "%.24s", p);
    }
}

private enum LS = 0x2028;       // UTF line separator
private enum PS = 0x2029;       // UTF paragraph separator

/********************************************
 * Do our own char maps
 */
private static immutable cmtable = ()
{
    ubyte[256] table;
    foreach (const c; 0 .. table.length)
    {
        if ('0' <= c && c <= '7')
            table[c] |= CMoctal;
        if (c_isxdigit(c))
            table[c] |= CMhex;
        if (c_isalnum(c) || c == '_')
            table[c] |= CMidchar;

        switch (c)
        {
            case 'x': case 'X':
            case 'b': case 'B':
                table[c] |= CMzerosecond;
                break;

            case '0': .. case '9':
            case 'e': case 'E':
            case 'f': case 'F':
            case 'l': case 'L':
            case 'p': case 'P':
            case 'u': case 'U':
            case 'i':
            case '.':
            case '_':
                table[c] |= CMzerosecond | CMdigitsecond;
                break;

            default:
                break;
        }

        switch (c)
        {
            case '\\':
            case '\n':
            case '\r':
            case 0:
            case 0x1A:
            case '\'':
                break;
            default:
                if (!(c & 0x80))
                    table[c] |= CMsinglechar;
                break;
        }
    }
    return table;
}();

private
{
    enum CMoctal  = 0x1;
    enum CMhex    = 0x2;
    enum CMidchar = 0x4;
    enum CMzerosecond = 0x8;
    enum CMdigitsecond = 0x10;
    enum CMsinglechar = 0x20;
}

private bool isoctal(const char c) pure @nogc @safe
{
    return (cmtable[c] & CMoctal) != 0;
}

private bool ishex(const char c) pure @nogc @safe
{
    return (cmtable[c] & CMhex) != 0;
}

private bool isidchar(const char c) pure @nogc @safe
{
    return (cmtable[c] & CMidchar) != 0;
}

private bool isZeroSecond(const char c) pure @nogc @safe
{
    return (cmtable[c] & CMzerosecond) != 0;
}

private bool isDigitSecond(const char c) pure @nogc @safe
{
    return (cmtable[c] & CMdigitsecond) != 0;
}

private bool issinglechar(const char c) pure @nogc @safe
{
    return (cmtable[c] & CMsinglechar) != 0;
}

private bool c_isxdigit(const int c) pure @nogc @safe
{
    return (( c >= '0' && c <= '9') ||
            ( c >= 'a' && c <= 'f') ||
            ( c >= 'A' && c <= 'F'));
}

private bool c_isalnum(const int c) pure @nogc @safe
{
    return (( c >= '0' && c <= '9') ||
            ( c >= 'a' && c <= 'z') ||
            ( c >= 'A' && c <= 'Z'));
}

/******************************* Unittest *****************************************/

unittest
{
    import dmd.console;
    nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
                                   const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
    {
        assert(0);
    }
    diagnosticHandler = &assertDiagnosticHandler;

    static void test(T)(string sequence, T expected, bool Ccompile = false)
    {
        auto p = cast(const(char)*)sequence.ptr;
        Lexer lexer = new Lexer();
        assert(expected == lexer.escapeSequence(Loc.initial, p, Ccompile));
        assert(p == sequence.ptr + sequence.length);
    }

    test(`'`, '\'');
    test(`"`, '"');
    test(`?`, '?');
    test(`\`, '\\');
    test(`0`, '\0');
    test(`a`, '\a');
    test(`b`, '\b');
    test(`f`, '\f');
    test(`n`, '\n');
    test(`r`, '\r');
    test(`t`, '\t');
    test(`v`, '\v');

    test(`x00`, 0x00);
    test(`xff`, 0xff);
    test(`xFF`, 0xff);
    test(`xa7`, 0xa7);
    test(`x3c`, 0x3c);
    test(`xe2`, 0xe2);

    test(`1`, '\1');
    test(`42`, '\42');
    test(`357`, '\357');

    test(`u1234`, '\u1234');
    test(`uf0e4`, '\uf0e4');

    test(`U0001f603`, '\U0001f603');

    test(`&quot;`, '"');
    test(`&lt;`, '<');
    test(`&gt;`, '>');

    diagnosticHandler = null;
}

unittest
{
    import dmd.console;
    string expected;
    bool gotError;

    nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
                                         const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
    {
        assert(cast(Classification)headerColor == Classification.error);

        gotError = true;
        char[100] buffer = void;
        auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)];
        assert(expected == actual);
        return true;
    }

    diagnosticHandler = &expectDiagnosticHandler;

    void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength, bool Ccompile = false)
    {
        uint errors = global.errors;
        gotError = false;
        expected = expectedError;
        auto p = cast(const(char)*)sequence.ptr;
        Lexer lexer = new Lexer();
        auto actualReturnValue = lexer.escapeSequence(Loc.initial, p, Ccompile);
        assert(gotError);
        assert(expectedReturnValue == actualReturnValue);

        auto actualScanLength = p - sequence.ptr;
        assert(expectedScanLength == actualScanLength);
        global.errors = errors;
    }

    test("c", `undefined escape sequence \c`, 'c', 1);
    test("!", `undefined escape sequence \!`, '!', 1);
    test("&quot;", `undefined escape sequence \&`, '&', 1, true);

    test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2);

    test("u1"  , `escape hex sequence has 1 hex digits instead of 4`,   0x1, 2);
    test("u12" , `escape hex sequence has 2 hex digits instead of 4`,  0x12, 3);
    test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4);

    test("U0"      , `escape hex sequence has 1 hex digits instead of 8`,       0x0, 2);
    test("U00"     , `escape hex sequence has 2 hex digits instead of 8`,      0x00, 3);
    test("U000"    , `escape hex sequence has 3 hex digits instead of 8`,     0x000, 4);
    test("U0000"   , `escape hex sequence has 4 hex digits instead of 8`,    0x0000, 5);
    test("U0001f"  , `escape hex sequence has 5 hex digits instead of 8`,   0x0001f, 6);
    test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`,  0x0001f6, 7);
    test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8);

    test("ud800"    , `invalid UTF character \U0000d800`, '?', 5);
    test("udfff"    , `invalid UTF character \U0000dfff`, '?', 5);
    test("U00110000", `invalid UTF character \U00110000`, '?', 9);

    test("xg0"      , `undefined escape hex sequence \xg`, 'g', 2);
    test("ug000"    , `undefined escape hex sequence \ug`, 'g', 2);
    test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2);

    test("&BAD;", `unnamed character entity &BAD;`  , '?', 5);
    test("&quot", `unterminated named entity &quot;`, '?', 5);
    test("&quot", `unterminated named entity &quot;`, '?', 5);

    test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3);

    diagnosticHandler = null;
}

unittest
{
    //printf("lexer.unittest\n");
    /* Not much here, just trying things out.
     */
    string text = "int"; // We rely on the implicit null-terminator
    scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0);
    TOK tok;
    tok = lex1.nextToken();
    //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32);
    assert(tok == TOK.int32);
    tok = lex1.nextToken();
    assert(tok == TOK.endOfFile);
    tok = lex1.nextToken();
    assert(tok == TOK.endOfFile);
    tok = lex1.nextToken();
    assert(tok == TOK.endOfFile);
}

unittest
{
    // We don't want to see Lexer error output during these tests.
    uint errors = global.startGagging();
    scope(exit) global.endGagging(errors);

    // Test malformed input: even malformed input should end in a TOK.endOfFile.
    static immutable char[][] testcases =
    [   // Testcase must end with 0 or 0x1A.
        [0], // not malformed, but pathological
        ['\'', 0],
        ['\'', 0x1A],
        ['{', '{', 'q', '{', 0],
        [0xFF, 0],
        [0xFF, 0x80, 0],
        [0xFF, 0xFF, 0],
        [0xFF, 0xFF, 0],
        ['x', '"', 0x1A],
    ];

    foreach (testcase; testcases)
    {
        scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0);
        TOK tok = lex2.nextToken();
        size_t iterations = 1;
        while ((tok != TOK.endOfFile) && (iterations++ < testcase.length))
        {
            tok = lex2.nextToken();
        }
        assert(tok == TOK.endOfFile);
        tok = lex2.nextToken();
        assert(tok == TOK.endOfFile);
    }
}