]> gcc.gnu.org Git - gcc.git/blame - gcc/ada/sinput.ads
[multiple changes]
[gcc.git] / gcc / ada / sinput.ads
CommitLineData
996ae0b0
RK
1------------------------------------------------------------------------------
2-- --
3-- GNAT COMPILER COMPONENTS --
4-- --
5-- S I N P U T --
6-- --
7-- S p e c --
8-- --
cd38efa5 9-- Copyright (C) 1992-2013, Free Software Foundation, Inc. --
996ae0b0
RK
10-- --
11-- GNAT is free software; you can redistribute it and/or modify it under --
12-- terms of the GNU General Public License as published by the Free Soft- --
748086b7 13-- ware Foundation; either version 3, or (at your option) any later ver- --
996ae0b0
RK
14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
748086b7
JJ
16-- or FITNESS FOR A PARTICULAR PURPOSE. --
17-- --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception, --
20-- version 3.1, as published by the Free Software Foundation. --
21-- --
22-- You should have received a copy of the GNU General Public License and --
23-- a copy of the GCC Runtime Library Exception along with this program; --
24-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25-- <http://www.gnu.org/licenses/>. --
996ae0b0
RK
26-- --
27-- GNAT was originally developed by the GNAT team at New York University. --
71ff80dc 28-- Extensive contributions were provided by Ada Core Technologies Inc. --
996ae0b0
RK
29-- --
30------------------------------------------------------------------------------
31
32-- This package contains the input routines used for reading the
33-- input source file. The actual I/O routines are in OS_Interface,
34-- with this module containing only the system independent processing.
35
36-- General Note: throughout the compiler, we use the term line or source
37-- line to refer to a physical line in the source, terminated by the end of
82c80734
RD
38-- physical line sequence.
39
40-- There are two distinct concepts of line terminator in GNAT
41
42-- A logical line terminator is what corresponds to the "end of a line" as
43-- described in RM 2.2 (13). Any of the characters FF, LF, CR or VT or any
44-- wide character that is a Line or Paragraph Separator acts as an end of
45-- logical line in this sense, and it is essentially irrelevant whether one
8e0aa19b 46-- or more appears in sequence (since if a sequence of such characters is
82c80734
RD
47-- regarded as separate ends of line, then the intervening logical lines
48-- are null in any case).
49
50-- A physical line terminator is a sequence of format effectors that is
51-- treated as ending a physical line. Physical lines have no Ada semantic
52-- significance, but they are significant for error reporting purposes,
53-- since errors are identified by line and column location.
54
c27f2f15
RD
55-- In GNAT, a physical line is ended by any of the sequences LF, CR/LF, or
56-- CR. LF is used in typical Unix systems, CR/LF in DOS systems, and CR
57-- alone in System 7. In addition, we recognize any of these sequences in
58-- any of the operating systems, for better behavior in treating foreign
59-- files (e.g. a Unix file with LF terminators transferred to a DOS system).
60-- Finally, wide character codes in categories Separator, Line and Separator,
61-- Paragraph are considered to be physical line terminators.
996ae0b0
RK
62
63with Alloc;
64with Casing; use Casing;
1c28fe3a 65with Namet; use Namet;
996ae0b0
RK
66with Table;
67with Types; use Types;
68
69package Sinput is
70
fbf5a39b
AC
71 type Type_Of_File is (
72 -- Indicates type of file being read
73
74 Src,
75 -- Normal Ada source file
76
77 Config,
78 -- Configuration pragma file
79
80 Def,
81 -- Preprocessing definition file
82
83 Preproc);
84 -- Source file with preprocessing commands to be preprocessed
85
cf427f02
AC
86 type Instance_Id is new Nat;
87 No_Instance_Id : constant Instance_Id;
88
996ae0b0
RK
89 ----------------------------
90 -- Source License Control --
91 ----------------------------
92
93 -- The following type indicates the license state of a source if it
94 -- is known.
95
96 type License_Type is
97 (Unknown,
98 -- Licensing status of this source unit is unknown
99
100 Restricted,
101 -- This is a non-GPL'ed unit that is restricted from depending
102 -- on GPL'ed units (e.g. proprietary code is in this category)
103
104 GPL,
105 -- This file is licensed under the unmodified GPL. It is not allowed
106 -- to depend on Non_GPL units, and Non_GPL units may not depend on
107 -- this source unit.
108
109 Modified_GPL,
110 -- This file is licensed under the GNAT modified GPL (see header of
111 -- This file for wording of the modification). It may depend on other
112 -- Modified_GPL units or on unrestricted units.
113
114 Unrestricted);
115 -- The license on this file is permitted to depend on any other
116 -- units, or have other units depend on it, without violating the
117 -- license of this unit. Examples are public domain units, and
118 -- units defined in the RM).
119
120 -- The above license status is checked when the appropriate check is
121 -- activated and one source depends on another, and the licensing state
122 -- of both files is known:
123
124 -- The prohibited combinations are:
125
126 -- Restricted file may not depend on GPL file
127
128 -- GPL file may not depend on Restricted file
129
130 -- Modified GPL file may not depend on Restricted file
131 -- Modified_GPL file may not depend on GPL file
132
133 -- The reason for the last restriction here is that a client depending
134 -- on a modified GPL file must be sure that the license condition is
135 -- correct considered transitively.
136
137 -- The licensing status is determined either by the presence of a
138 -- specific pragma License, or by scanning the header for a predefined
139 -- file, or any file if compiling in -gnatg mode.
140
141 -----------------------
142 -- Source File Table --
143 -----------------------
144
145 -- The source file table has an entry for each source file read in for
146 -- this run of the compiler. This table is (default) initialized when
147 -- the compiler is loaded, and simply accumulates entries as compilation
fbf5a39b
AC
148 -- proceeds and various routines in Sinput and its child packages are
149 -- called to load required source files.
996ae0b0
RK
150
151 -- Virtual entries are also created for generic templates when they are
152 -- instantiated, as described in a separate section later on.
153
154 -- In the case where there are multiple main units (e.g. in the case of
155 -- the cross-reference tool), this table is not reset between these units,
156 -- so that a given source file is only read once if it is used by two
157 -- separate main units.
158
159 -- The entries in the table are accessed using a Source_File_Index that
160 -- ranges from 1 to Last_Source_File. Each entry has the following fields
161
fbf5a39b
AC
162 -- Note: fields marked read-only are set by Sinput or one of its child
163 -- packages when a source file table entry is created, and cannot be
3354f96d 164 -- subsequently modified, or alternatively are set only by very special
fbf5a39b 165 -- circumstances, documented in the comments.
996ae0b0 166
fbf5a39b
AC
167 -- File_Name : File_Name_Type (read-only)
168 -- Name of the source file (simple name with no directory information)
996ae0b0 169
fbf5a39b 170 -- Full_File_Name : File_Name_Type (read-only)
996ae0b0 171 -- Full file name (full name with directory info), used for generation
fbf5a39b
AC
172 -- of error messages, etc.
173
174 -- File_Type : Type_Of_File (read-only)
175 -- Indicates type of file (source file, configuration pragmas file,
176 -- preprocessor definition file, preprocessor input file).
996ae0b0 177
fbf5a39b 178 -- Reference_Name : File_Name_Type (read-only)
996ae0b0
RK
179 -- Name to be used for source file references in error messages where
180 -- only the simple name of the file is required. Identical to File_Name
181 -- unless pragma Source_Reference is used to change it. Only processing
182 -- for the Source_Reference pragma circuit may set this field.
183
fbf5a39b 184 -- Full_Ref_Name : File_Name_Type (read-only)
996ae0b0
RK
185 -- Name to be used for source file references in error messages where
186 -- the full name of the file is required. Identical to Full_File_Name
187 -- unless pragma Source_Reference is used to change it. Only processing
188 -- for the Source_Reference pragma may set this field.
189
fbf5a39b 190 -- Debug_Source_Name : File_Name_Type (read-only)
996ae0b0
RK
191 -- Name to be used for source file references in debugging information
192 -- where only the simple name of the file is required. Identical to
fbf5a39b
AC
193 -- Reference_Name unless the -gnatD (debug source file) switch is used.
194 -- Only processing in Sprint that generates this file is permitted to
195 -- set this field.
196
197 -- Full_Debug_Name : File_Name_Type (read-only)
198 -- Name to be used for source file references in debugging information
199 -- where the full name of the file is required. This is identical to
996ae0b0
RK
200 -- Full_Ref_Name unless the -gnatD (debug source file) switch is used.
201 -- Only processing in Sprint that generates this file is permitted to
202 -- set this field.
203
cf427f02
AC
204 -- Instance : Instance_Id (read-only)
205 -- For entries corresponding to a generic instantiation, unique
206 -- identifier denoting the full chain of nested instantiations. Set to
207 -- No_Instance_Id for the case of a normal, non-instantiation entry.
208 -- See below for details on the handling of generic instantiations.
209
996ae0b0
RK
210 -- License : License_Type;
211 -- License status of source file
212
213 -- Num_SRef_Pragmas : Nat;
214 -- Number of source reference pragmas present in source file
215
216 -- First_Mapped_Line : Logical_Line_Number;
217 -- This field stores logical line number of the first line in the
218 -- file that is not a Source_Reference pragma. If no source reference
219 -- pragmas are used, then the value is set to No_Line_Number.
220
fbf5a39b 221 -- Source_Text : Source_Buffer_Ptr (read-only)
996ae0b0
RK
222 -- Text of source file. Note that every source file has a distinct set
223 -- of non-overlapping logical bounds, so it is possible to determine
224 -- which file is referenced from a given subscript (Source_Ptr) value.
996ae0b0 225
fbf5a39b 226 -- Source_First : Source_Ptr; (read-only)
996ae0b0
RK
227 -- Subscript of first character in Source_Text. Note that this cannot
228 -- be obtained as Source_Text'First, because we use virtual origin
fbf5a39b 229 -- addressing.
996ae0b0 230
fbf5a39b 231 -- Source_Last : Source_Ptr; (read-only)
996ae0b0
RK
232 -- Subscript of last character in Source_Text. Note that this cannot
233 -- be obtained as Source_Text'Last, because we use virtual origin
fbf5a39b 234 -- addressing, so this value is always Source_Ptr'Last.
996ae0b0 235
fbf5a39b
AC
236 -- Time_Stamp : Time_Stamp_Type; (read-only)
237 -- Time stamp of the source file
996ae0b0
RK
238
239 -- Source_Checksum : Word;
240 -- Computed checksum for contents of source file. See separate section
241 -- later on in this spec for a description of the checksum algorithm.
242
243 -- Last_Source_Line : Physical_Line_Number;
3354f96d 244 -- Physical line number of last source line. While a file is being
996ae0b0
RK
245 -- read, this refers to the last line scanned. Once a file has been
246 -- completely scanned, it is the number of the last line in the file,
247 -- and hence also gives the number of source lines in the file.
248
249 -- Keyword_Casing : Casing_Type;
250 -- Casing style used in file for keyword casing. This is initialized
251 -- to Unknown, and then set from the first occurrence of a keyword.
252 -- This value is used only for formatting of error messages.
253
254 -- Identifier_Casing : Casing_Type;
255 -- Casing style used in file for identifier casing. This is initialized
256 -- to Unknown, and then set from an identifier in the program as soon as
257 -- one is found whose casing is sufficiently clear to make a decision.
258 -- This value is used for formatting of error messages, and also is used
259 -- in the detection of keywords misused as identifiers.
260
cf427f02
AC
261 -- Inlined_Call : Source_Ptr;
262 -- Source file location of the subprogram call if this source file entry
263 -- represents an inlined body. Set to No_Location otherwise.
996ae0b0
RK
264 -- This field is read-only for clients.
265
fbf5a39b
AC
266 -- Inlined_Body : Boolean;
267 -- This can only be set True if Instantiation has a value other than
268 -- No_Location. If true it indicates that the instantiation is actually
269 -- an instance of an inlined body.
cf427f02 270 -- ??? Redundant, always equal to (Inlined_Call /= No_Location)
fbf5a39b
AC
271
272 -- Template : Source_File_Index; (read-only)
996ae0b0
RK
273 -- Source file index of the source file containing the template if this
274 -- is a generic instantiation. Set to No_Source_File for the normal case
fbf5a39b 275 -- of a non-instantiation entry. See Sinput-L for details.
996ae0b0 276
68e2ea27
TQ
277 -- Unit : Unit_Number_Type;
278 -- Identifies the unit contained in this source file. Set by
279 -- Initialize_Scanner, must not be subsequently altered.
280
996ae0b0
RK
281 -- The source file table is accessed by clients using the following
282 -- subprogram interface:
283
284 subtype SFI is Source_File_Index;
285
fbf5a39b
AC
286 System_Source_File_Index : SFI;
287 -- The file system.ads is always read by the compiler to determine the
288 -- settings of the target parameters in the private part of System. This
289 -- variable records the source file index of system.ads. Typically this
290 -- will be 1 since system.ads is read first.
291
996ae0b0
RK
292 function Debug_Source_Name (S : SFI) return File_Name_Type;
293 function File_Name (S : SFI) return File_Name_Type;
fbf5a39b 294 function File_Type (S : SFI) return Type_Of_File;
996ae0b0 295 function First_Mapped_Line (S : SFI) return Logical_Line_Number;
fbf5a39b 296 function Full_Debug_Name (S : SFI) return File_Name_Type;
996ae0b0
RK
297 function Full_File_Name (S : SFI) return File_Name_Type;
298 function Full_Ref_Name (S : SFI) return File_Name_Type;
299 function Identifier_Casing (S : SFI) return Casing_Type;
fbf5a39b 300 function Inlined_Body (S : SFI) return Boolean;
cf427f02
AC
301 function Inlined_Call (S : SFI) return Source_Ptr;
302 function Instance (S : SFI) return Instance_Id;
996ae0b0
RK
303 function Keyword_Casing (S : SFI) return Casing_Type;
304 function Last_Source_Line (S : SFI) return Physical_Line_Number;
305 function License (S : SFI) return License_Type;
306 function Num_SRef_Pragmas (S : SFI) return Nat;
307 function Reference_Name (S : SFI) return File_Name_Type;
308 function Source_Checksum (S : SFI) return Word;
309 function Source_First (S : SFI) return Source_Ptr;
310 function Source_Last (S : SFI) return Source_Ptr;
311 function Source_Text (S : SFI) return Source_Buffer_Ptr;
312 function Template (S : SFI) return Source_File_Index;
68e2ea27 313 function Unit (S : SFI) return Unit_Number_Type;
996ae0b0
RK
314 function Time_Stamp (S : SFI) return Time_Stamp_Type;
315
316 procedure Set_Keyword_Casing (S : SFI; C : Casing_Type);
317 procedure Set_Identifier_Casing (S : SFI; C : Casing_Type);
318 procedure Set_License (S : SFI; L : License_Type);
68e2ea27 319 procedure Set_Unit (S : SFI; U : Unit_Number_Type);
996ae0b0
RK
320
321 function Last_Source_File return Source_File_Index;
322 -- Index of last source file table entry
323
324 function Num_Source_Files return Nat;
325 -- Number of source file table entries
326
327 procedure Initialize;
328 -- Initialize internal tables
329
330 procedure Lock;
331 -- Lock internal tables
332
1c28fe3a
RD
333 procedure Unlock;
334 -- Unlock internal tables
335
82c80734 336 Main_Source_File : Source_File_Index := No_Source_File;
996ae0b0
RK
337 -- This is set to the source file index of the main unit
338
fbf5a39b
AC
339 -----------------------------
340 -- Source_File_Index_Table --
341 -----------------------------
342
343 -- The Get_Source_File_Index function is called very frequently. Earlier
344 -- versions cached a single entry, but then reverted to a serial search,
cd38efa5
AC
345 -- and this proved to be a significant source of inefficiency. We then
346 -- switched to using a table with a start point followed by a serial
347 -- search. Now we make sure source buffers are on a reasonable boundary
348 -- (see Types.Source_Align), and we can just use a direct look up in the
349 -- following table.
fbf5a39b
AC
350
351 -- Note that this array is pretty large, but in most operating systems
352 -- it will not be allocated in physical memory unless it is actually used.
353
fbf5a39b 354 Source_File_Index_Table :
cd38efa5 355 array (Int range 0 .. 1 + (Int'Last / Source_Align)) of Source_File_Index;
fbf5a39b
AC
356
357 procedure Set_Source_File_Index_Table (Xnew : Source_File_Index);
358 -- Sets entries in the Source_File_Index_Table for the newly created
359 -- Source_File table entry whose index is Xnew. The Source_First and
360 -- Source_Last fields of this entry must be set before the call.
361
996ae0b0
RK
362 -----------------------
363 -- Checksum Handling --
364 -----------------------
365
366 -- As a source file is scanned, a checksum is computed by taking all the
367 -- non-blank characters in the file, excluding comment characters, the
368 -- minus-minus sequence starting a comment, and all control characters
369 -- except ESC.
370
cfac6e9f
PO
371 -- The checksum algorithm used is the standard CRC-32 algorithm, as
372 -- implemented by System.CRC32, except that we do not bother with the
373 -- final XOR with all 1 bits.
996ae0b0
RK
374
375 -- This algorithm ensures that the checksum includes all semantically
376 -- significant aspects of the program represented by the source file,
377 -- but is insensitive to layout, presence or contents of comments, wide
378 -- character representation method, or casing conventions outside strings.
379
cfac6e9f
PO
380 -- Scans.Checksum is initialized appropriately at the start of scanning
381 -- a file, and copied into the Source_Checksum field of the file table
382 -- entry when the end of file is encountered.
996ae0b0
RK
383
384 -------------------------------------
385 -- Handling Generic Instantiations --
386 -------------------------------------
387
388 -- As described in Sem_Ch12, a generic instantiation involves making a
389 -- copy of the tree of the generic template. The source locations in
390 -- this tree directly reference the source of the template. However it
391 -- is also possible to find the location of the instantiation.
392
393 -- This is achieved as follows. When an instantiation occurs, a new entry
394 -- is made in the source file table. This entry points to the same source
395 -- text, i.e. the file that contains the instantiation, but has a distinct
396 -- set of Source_Ptr index values. The separate range of Sloc values avoids
397 -- confusion, and means that the Sloc values can still be used to uniquely
398 -- identify the source file table entry. It is possible for both entries
399 -- to point to the same text, because of the virtual origin pointers used
400 -- in the source table.
401
cf427f02
AC
402 -- The Instantiation_Id field of this source file index entry, set
403 -- to No_Instance_Id for normal entries, instead contains a value that
404 -- uniquely identifies a particular instantiation, and the associated
405 -- entry in the Instances table. The source location of the instantiation
406 -- can be retrieved using function Instantiation below. In the case of
407 -- nested instantiations, the Instances table can be used to trace the
408 -- complete chain of nested instantiations.
996ae0b0 409
cf427f02
AC
410 -- Two routines are used to build the special instance entries in the
411 -- source file table. Create_Instantiation_Source is first called to build
996ae0b0
RK
412 -- the virtual source table entry for the instantiation, and then the
413 -- Sloc values in the copy are adjusted using Adjust_Instantiation_Sloc.
414 -- See child unit Sinput.L for details on these two routines.
415
cf427f02
AC
416 generic
417 with procedure Process (Id : Instance_Id; Inst_Sloc : Source_Ptr);
418 procedure Iterate_On_Instances;
419 -- Execute Process for each entry in the instance table
420
421 function Instantiation (S : SFI) return Source_Ptr;
422 -- For a source file entry that represents an inlined body, source location
423 -- of the inlined call. Otherwise, for a source file entry that represents
424 -- a generic instantiation, source location of the instantiation. Returns
425 -- No_Location in all other cases.
426
996ae0b0
RK
427 -----------------
428 -- Global Data --
429 -----------------
430
35debead
EB
431 Current_Source_File : Source_File_Index := No_Source_File;
432 -- Source_File table index of source file currently being scanned.
433 -- Initialized so that some tools (such as gprbuild) can be built with
434 -- -gnatVa and pragma Initialized_Scalars without problems.
996ae0b0
RK
435
436 Current_Source_Unit : Unit_Number_Type;
437 -- Unit number of source file currently being scanned. The special value
438 -- of No_Unit indicates that the configuration pragma file is currently
439 -- being scanned (this has no entry in the unit table).
440
441 Source_gnat_adc : Source_File_Index := No_Source_File;
442 -- This is set if a gnat.adc file is present to reference this file
443
444 Source : Source_Buffer_Ptr;
445 -- Current source (copy of Source_File.Table (Current_Source_Unit).Source)
446
447 Internal_Source : aliased Source_Buffer (1 .. 81);
448 -- This buffer is used internally in the compiler when the lexical analyzer
449 -- is used to scan a string from within the compiler. The procedure is to
450 -- establish Internal_Source_Ptr as the value of Source, set the string to
451 -- be scanned, appropriately terminated, in this buffer, and set Scan_Ptr
452 -- to point to the start of the buffer. It is a fatal error if the scanner
453 -- signals an error while scanning a token in this internal buffer.
454
455 Internal_Source_Ptr : constant Source_Buffer_Ptr :=
456 Internal_Source'Unrestricted_Access;
457 -- Pointer to internal source buffer
458
8e0aa19b
RD
459 -----------------------------------------
460 -- Handling of Source Line Terminators --
461 -----------------------------------------
462
463 -- In this section we discuss in detail the issue of terminators used to
464 -- terminate source lines. The RM says that one or more format effectors
465 -- (other than horizontal tab) end a source line, and defines the set of
466 -- such format effectors, but does not talk about exactly how they are
467 -- represented in the source program (since in general the RM is not in
468 -- the business of specifying source program formats).
469
470 -- The type Types.Line_Terminator is defined as a subtype of Character
471 -- that includes CR/LF/VT/FF. The most common line enders in practice
472 -- are CR (some MAC systems), LF (Unix systems), and CR/LF (DOS/Windows
473 -- systems). Any of these sequences is recognized as ending a physical
474 -- source line, and if multiple such terminators appear (e.g. LF/LF),
475 -- then we consider we have an extra blank line.
476
477 -- VT and FF are recognized as terminating source lines, but they are
478 -- considered to end a logical line instead of a physical line, so that
479 -- the line numbering ignores such terminators. The use of VT and FF is
480 -- mandated by the standard, and correctly handled in a conforming manner
481 -- by GNAT, but their use is not recommended.
482
483 -- In addition to the set of characters defined by the type in Types, in
484 -- wide character encoding, then the codes returning True for a call to
be482a8c
AC
485 -- System.UTF_32.Is_UTF_32_Line_Terminator are also recognized as ending a
486 -- source line. This includes the standard codes defined above in addition
487 -- to NEL (NEXT LINE), LINE SEPARATOR and PARAGRAPH SEPARATOR. Again, as in
488 -- the case of VT and FF, the standard requires we recognize these as line
489 -- terminators, but we consider them to be logical line terminators. The
490 -- only physical line terminators recognized are the standard ones (CR,
491 -- LF, or CR/LF).
8e0aa19b
RD
492
493 -- However, we do not recognize the NEL (16#85#) character as having the
494 -- significance of an end of line character when operating in normal 8-bit
495 -- Latin-n input mode for the compiler. Instead the rule in this mode is
496 -- that all upper half control codes (16#80# .. 16#9F#) are illegal if they
497 -- occur in program text, and are ignored if they appear in comments.
498
499 -- First, note that this behavior is fully conforming with the standard.
500 -- The standard has nothing whatever to say about source representation
501 -- and implementations are completely free to make there own rules. In
502 -- this case, in 8-bit mode, GNAT decides that the 16#0085# character is
503 -- not a representation of the NEL character, even though it looks like it.
504 -- If you have NEL's in your program, which you expect to be treated as
505 -- end of line characters, you must use a wide character encoding such as
506 -- UTF-8 for this code to be recognized.
507
508 -- Second, an explanation of why we take this slightly surprising choice.
509 -- We have never encountered anyone actually using the NEL character to
510 -- end lines. One user raised the issue as a result of some experiments,
511 -- but no one has ever submitted a program encoded this way, in any of
512 -- the possible encodings. It seems that even when using wide character
513 -- codes extensively, the normal approach is to use standard line enders
514 -- (LF or CR/LF). So the failure to recognize NEL in this mode seems to
515 -- have no practical downside.
516
517 -- Moreover, what we have seen in a significant number of programs from
518 -- multiple sources is the practice of writing all program text in lower
519 -- half (ASCII) form, but using UTF-8 encoded wide characters freely in
520 -- comments, where the comments are terminated by normal line endings
521 -- (LF or CR/LF). The comments do not contain NEL codes, but they can and
522 -- do contain other UTF-8 encoding sequences where one of the bytes is the
523 -- NEL code. Now such programs can of course be compiled in UTF-8 mode,
524 -- but in practice they also compile fine in standard 8-bit mode without
525 -- specifying a character encoding. Since this is common practice, it would
526 -- be a signficant upwards incompatibility to recognize NEL in 8-bit mode.
527
996ae0b0
RK
528 -----------------
529 -- Subprograms --
530 -----------------
531
532 procedure Backup_Line (P : in out Source_Ptr);
533 -- Back up the argument pointer to the start of the previous line. On
534 -- entry, P points to the start of a physical line in the source buffer.
535 -- On return, P is updated to point to the start of the previous line.
536 -- The caller has checked that a Line_Terminator character precedes P so
537 -- that there definitely is a previous line in the source buffer.
538
539 procedure Build_Location_String (Loc : Source_Ptr);
21d27997
RD
540 -- This function builds a string literal of the form "name:line", where
541 -- name is the file name corresponding to Loc, and line is the line number.
542 -- In the event that instantiations are involved, additional suffixes of
543 -- the same form are appended after the separating string " instantiated at
544 -- ". The returned string is appended to the Name_Buffer, terminated by
545 -- ASCII.NUL, with Name_Length indicating the length not including the
546 -- terminating Nul.
996ae0b0 547
beacce02
AC
548 function Build_Location_String (Loc : Source_Ptr) return String;
549 -- Functional form returning a string, which does not include a terminating
550 -- null character. The contents of Name_Buffer is destroyed.
551
260359e3
AC
552 procedure Check_For_BOM;
553 -- Check if the current source starts with a BOM. Scan_Ptr needs to be at
554 -- the start of the current source. If the current source starts with a
555 -- recognized BOM, then some flags such as Wide_Character_Encoding_Method
556 -- are set accordingly, and the Scan_Ptr on return points past this BOM.
557 -- An error message is output and Unrecoverable_Error raised if a non-
558 -- recognized BOM is detected. The call has no effect if no BOM is found.
559
996ae0b0
RK
560 function Get_Column_Number (P : Source_Ptr) return Column_Number;
561 -- The ones-origin column number of the specified Source_Ptr value is
562 -- determined and returned. Tab characters if present are assumed to
563 -- represent the standard 1,9,17.. spacing pattern.
564
565 function Get_Logical_Line_Number
0da2c8ac 566 (P : Source_Ptr) return Logical_Line_Number;
996ae0b0
RK
567 -- The line number of the specified source position is obtained by
568 -- doing a binary search on the source positions in the lines table
569 -- for the unit containing the given source position. The returned
570 -- value is the logical line number, already adjusted for the effect
571 -- of source reference pragmas. If P refers to the line of a source
572 -- reference pragma itself, then No_Line is returned. If no source
573 -- reference pragmas have been encountered, the value returned is
574 -- the same as the physical line number.
575
c775c209
AC
576 function Get_Logical_Line_Number_Img
577 (P : Source_Ptr) return String;
578 -- Same as above function, but returns the line number as a string of
579 -- decimal digits, with no leading space. Destroys Name_Buffer.
580
996ae0b0 581 function Get_Physical_Line_Number
0da2c8ac 582 (P : Source_Ptr) return Physical_Line_Number;
996ae0b0
RK
583 -- The line number of the specified source position is obtained by
584 -- doing a binary search on the source positions in the lines table
585 -- for the unit containing the given source position. The returned
586 -- value is the physical line number in the source being compiled.
587
588 function Get_Source_File_Index (S : Source_Ptr) return Source_File_Index;
cd38efa5 589 pragma Inline (Get_Source_File_Index);
996ae0b0
RK
590 -- Return file table index of file identified by given source pointer
591 -- value. This call must always succeed, since any valid source pointer
592 -- value belongs to some previously loaded source file.
593
594 function Instantiation_Depth (S : Source_Ptr) return Nat;
595 -- Determine instantiation depth for given Sloc value. A value of
596 -- zero means that the given Sloc is not in an instantiation.
597
598 function Line_Start (P : Source_Ptr) return Source_Ptr;
599 -- Finds the source position of the start of the line containing the
600 -- given source location.
601
602 function Line_Start
0da2c8ac
AC
603 (L : Physical_Line_Number;
604 S : Source_File_Index) return Source_Ptr;
996ae0b0
RK
605 -- Finds the source position of the start of the given line in the
606 -- given source file, using a physical line number to identify the line.
607
608 function Num_Source_Lines (S : Source_File_Index) return Nat;
609 -- Returns the number of source lines (this is equivalent to reading
3354f96d 610 -- the value of Last_Source_Line, but returns Nat rather than a
996ae0b0
RK
611 -- physical line number.
612
613 procedure Register_Source_Ref_Pragma
1c28fe3a
RD
614 (File_Name : File_Name_Type;
615 Stripped_File_Name : File_Name_Type;
996ae0b0
RK
616 Mapped_Line : Nat;
617 Line_After_Pragma : Physical_Line_Number);
618 -- Register a source reference pragma, the parameter File_Name is the
619 -- file name from the pragma, and Stripped_File_Name is this name with
620 -- the directory information stripped. Both these parameters are set
621 -- to No_Name if no file name parameter was given in the pragma.
622 -- (which can only happen for the second and subsequent pragmas).
623 -- Mapped_Line is the line number parameter from the pragma, and
624 -- Line_After_Pragma is the physical line number of the line that
625 -- follows the line containing the Source_Reference pragma.
626
627 function Original_Location (S : Source_Ptr) return Source_Ptr;
628 -- Given a source pointer S, returns the corresponding source pointer
629 -- value ignoring instantiation copies. For locations that do not
630 -- correspond to instantiation copies of templates, the argument is
631 -- returned unchanged. For locations that do correspond to copies of
632 -- templates from instantiations, the location within the original
633 -- template is returned. This is useful in canonicalizing locations.
634
635 function Instantiation_Location (S : Source_Ptr) return Source_Ptr;
636 pragma Inline (Instantiation_Location);
637 -- Given a source pointer S, returns the corresponding source pointer
638 -- value of the instantiation if this location is within an instance.
639 -- If S is not within an instance, then this returns No_Location.
640
641 function Top_Level_Location (S : Source_Ptr) return Source_Ptr;
642 -- Given a source pointer S, returns the argument unchanged if it is
643 -- not in an instantiation. If S is in an instantiation, then it returns
644 -- the location of the top level instantiation, i.e. the outer level
645 -- instantiation in the nested case.
646
647 function Physical_To_Logical
648 (Line : Physical_Line_Number;
0da2c8ac 649 S : Source_File_Index) return Logical_Line_Number;
996ae0b0
RK
650 -- Given a physical line number in source file whose source index is S,
651 -- return the corresponding logical line number. If the physical line
652 -- number is one containing a Source_Reference pragma, the result will
653 -- be No_Line_Number.
654
655 procedure Skip_Line_Terminators
656 (P : in out Source_Ptr;
657 Physical : out Boolean);
e7d72fb9
AC
658 -- On entry, P points to a line terminator that has been encountered,
659 -- which is one of FF,LF,VT,CR or a wide character sequence whose value is
660 -- in category Separator,Line or Separator,Paragraph. P points just past
661 -- the character that was scanned. The purpose of this routine is to
662 -- distinguish physical and logical line endings. A physical line ending
663 -- is one of:
82c80734
RD
664 --
665 -- CR on its own (MAC System 7)
666 -- LF on its own (Unix and unix-like systems)
667 -- CR/LF (DOS, Windows)
82c80734
RD
668 -- Wide character in Separator,Line or Separator,Paragraph category
669 --
c27f2f15
RD
670 -- Note: we no longer recognize LF/CR (which we did in some earlier
671 -- versions of GNAT. The reason for this is that this sequence is not
672 -- used and recognizing it generated confusion. For example given the
673 -- sequence LF/CR/LF we were interpreting that as (LF/CR) ending the
674 -- first line and a blank line ending with CR following, but it is
675 -- clearly better to interpret this as LF, with a blank line terminated
676 -- by CR/LF, given that LF and CR/LF are both in common use, but no
677 -- system we know of uses LF/CR.
678 --
82c80734
RD
679 -- A logical line ending (that is not a physical line ending) is one of:
680 --
681 -- VT on its own
682 -- FF on its own
683 --
684 -- On return, P is bumped past the line ending sequence (one of the above
685 -- seven possibilities). Physical is set to True to indicate that a
686 -- physical end of line was encountered, in which case this routine also
687 -- makes sure that the lines table for the current source file has an
688 -- appropriate entry for the start of the new physical line.
996ae0b0 689
5c39d89f
RD
690 procedure Sloc_Range (N : Node_Id; Min, Max : out Source_Ptr);
691 -- Given a node, returns the minimum and maximum source locations of any
692 -- node in the syntactic subtree for the node. This is not quite the same
693 -- as the locations of the first and last token in the node construct
e7d72fb9
AC
694 -- because parentheses at the outer level do not have a recorded Sloc.
695 --
800da977
AC
696 -- Note: At each step of the tree traversal, we make sure to go back to
697 -- the Original_Node, since this function is concerned about original
698 -- (source) locations.
699 --
e7d72fb9 700 -- Note: if the tree for the expression contains no "real" Sloc values,
800da977
AC
701 -- i.e. values > No_Location, then both Min and Max are set to
702 -- Sloc (Original_Node (N)).
e7d72fb9 703
996ae0b0
RK
704 function Source_Offset (S : Source_Ptr) return Nat;
705 -- Returns the zero-origin offset of the given source location from the
706 -- start of its corresponding unit. This is used for creating canonical
707 -- names in some situations.
708
709 procedure Write_Location (P : Source_Ptr);
710 -- Writes out a string of the form fff:nn:cc, where fff, nn, cc are the
711 -- file name, line number and column corresponding to the given source
712 -- location. No_Location and Standard_Location appear as the strings
713 -- <no location> and <standard location>. If the location is within an
714 -- instantiation, then the instance location is appended, enclosed in
715 -- square brackets (which can nest if necessary). Note that this routine
716 -- is used only for internal compiler debugging output purposes (which
717 -- is why the somewhat cryptic use of brackets is acceptable).
718
719 procedure wl (P : Source_Ptr);
07fc65c4 720 pragma Export (Ada, wl);
996ae0b0
RK
721 -- Equivalent to Write_Location (P); Write_Eol; for calls from GDB
722
723 procedure Write_Time_Stamp (S : Source_File_Index);
724 -- Writes time stamp of specified file in YY-MM-DD HH:MM.SS format
725
996ae0b0 726 procedure Tree_Read;
87b3f81f
AC
727 -- Initializes internal tables from current tree file using the relevant
728 -- Table.Tree_Read routines.
729
730 procedure Tree_Write;
731 -- Writes out internal tables to current tree file using the relevant
732 -- Table.Tree_Write routines.
996ae0b0
RK
733
734private
735 pragma Inline (File_Name);
996ae0b0 736 pragma Inline (Full_File_Name);
cf427f02
AC
737 pragma Inline (File_Type);
738 pragma Inline (Reference_Name);
739 pragma Inline (Full_Ref_Name);
740 pragma Inline (Debug_Source_Name);
741 pragma Inline (Full_Debug_Name);
742 pragma Inline (Instance);
996ae0b0
RK
743 pragma Inline (License);
744 pragma Inline (Num_SRef_Pragmas);
cf427f02
AC
745 pragma Inline (First_Mapped_Line);
746 pragma Inline (Source_Text);
996ae0b0
RK
747 pragma Inline (Source_First);
748 pragma Inline (Source_Last);
996ae0b0 749 pragma Inline (Time_Stamp);
cf427f02
AC
750 pragma Inline (Source_Checksum);
751 pragma Inline (Last_Source_Line);
752 pragma Inline (Keyword_Casing);
753 pragma Inline (Identifier_Casing);
754 pragma Inline (Inlined_Call);
755 pragma Inline (Inlined_Body);
756 pragma Inline (Template);
757 pragma Inline (Unit);
758
759 pragma Inline (Set_Keyword_Casing);
760 pragma Inline (Set_Identifier_Casing);
761
762 pragma Inline (Last_Source_File);
763 pragma Inline (Num_Source_Files);
764 pragma Inline (Num_Source_Lines);
765
766 No_Instance_Id : constant Instance_Id := 0;
996ae0b0
RK
767
768 -------------------------
769 -- Source_Lines Tables --
770 -------------------------
771
772 type Lines_Table_Type is
773 array (Physical_Line_Number) of Source_Ptr;
774 -- Type used for lines table. The entries are indexed by physical line
775 -- numbers. The values are the starting Source_Ptr values for the start
776 -- of the corresponding physical line. Note that we make this a bogus
777 -- big array, sized as required, so that we avoid the use of fat pointers.
778
779 type Lines_Table_Ptr is access all Lines_Table_Type;
780 -- Type used for pointers to line tables
781
782 type Logical_Lines_Table_Type is
783 array (Physical_Line_Number) of Logical_Line_Number;
784 -- Type used for logical lines table. This table is used if a source
785 -- reference pragma is present. It is indexed by physical line numbers,
786 -- and contains the corresponding logical line numbers. An entry that
787 -- corresponds to a source reference pragma is set to No_Line_Number.
788 -- Note that we make this a bogus big array, sized as required, so that
789 -- we avoid the use of fat pointers.
790
791 type Logical_Lines_Table_Ptr is access all Logical_Lines_Table_Type;
9de61fcb 792 -- Type used for pointers to logical line tables
996ae0b0
RK
793
794 -----------------------
795 -- Source_File Table --
796 -----------------------
797
798 -- See earlier descriptions for meanings of public fields
799
800 type Source_File_Record is record
996ae0b0
RK
801 File_Name : File_Name_Type;
802 Reference_Name : File_Name_Type;
803 Debug_Source_Name : File_Name_Type;
fbf5a39b 804 Full_Debug_Name : File_Name_Type;
996ae0b0
RK
805 Full_File_Name : File_Name_Type;
806 Full_Ref_Name : File_Name_Type;
cf427f02 807 Instance : Instance_Id;
996ae0b0
RK
808 Num_SRef_Pragmas : Nat;
809 First_Mapped_Line : Logical_Line_Number;
810 Source_Text : Source_Buffer_Ptr;
811 Source_First : Source_Ptr;
812 Source_Last : Source_Ptr;
996ae0b0
RK
813 Source_Checksum : Word;
814 Last_Source_Line : Physical_Line_Number;
996ae0b0 815 Template : Source_File_Index;
68e2ea27 816 Unit : Unit_Number_Type;
1c28fe3a
RD
817 Time_Stamp : Time_Stamp_Type;
818 File_Type : Type_Of_File;
cf427f02 819 Inlined_Call : Source_Ptr;
1c28fe3a
RD
820 Inlined_Body : Boolean;
821 License : License_Type;
822 Keyword_Casing : Casing_Type;
823 Identifier_Casing : Casing_Type;
996ae0b0
RK
824
825 -- The following fields are for internal use only (i.e. only in the
826 -- body of Sinput or its children, with no direct access by clients).
827
828 Sloc_Adjust : Source_Ptr;
829 -- A value to be added to Sloc values for this file to reference the
830 -- corresponding lines table. This is zero for the non-instantiation
3354f96d 831 -- case, and set so that the addition references the ultimate template
996ae0b0
RK
832 -- for the instantiation case. See Sinput-L for further details.
833
834 Lines_Table : Lines_Table_Ptr;
835 -- Pointer to lines table for this source. Updated as additional
836 -- lines are accessed using the Skip_Line_Terminators procedure.
837 -- Note: the lines table for an instantiation entry refers to the
838 -- original line numbers of the template see Sinput-L for details.
839
840 Logical_Lines_Table : Logical_Lines_Table_Ptr;
841 -- Pointer to logical lines table for this source. Non-null only if
842 -- a source reference pragma has been processed. Updated as lines
843 -- are accessed using the Skip_Line_Terminators procedure.
844
845 Lines_Table_Max : Physical_Line_Number;
846 -- Maximum subscript values for currently allocated Lines_Table
847 -- and (if present) the allocated Logical_Lines_Table. The value
848 -- Max_Source_Line gives the maximum used value, this gives the
849 -- maximum allocated value.
850
851 end record;
852
1c28fe3a
RD
853 -- The following representation clause ensures that the above record
854 -- has no holes. We do this so that when instances of this record are
855 -- written by Tree_Gen, we do not write uninitialized values to the file.
856
857 AS : constant Pos := Standard'Address_Size;
858
859 for Source_File_Record use record
860 File_Name at 0 range 0 .. 31;
861 Reference_Name at 4 range 0 .. 31;
862 Debug_Source_Name at 8 range 0 .. 31;
863 Full_Debug_Name at 12 range 0 .. 31;
864 Full_File_Name at 16 range 0 .. 31;
865 Full_Ref_Name at 20 range 0 .. 31;
cf427f02 866 Instance at 48 range 0 .. 31;
1c28fe3a
RD
867 Num_SRef_Pragmas at 24 range 0 .. 31;
868 First_Mapped_Line at 28 range 0 .. 31;
869 Source_First at 32 range 0 .. 31;
870 Source_Last at 36 range 0 .. 31;
871 Source_Checksum at 40 range 0 .. 31;
872 Last_Source_Line at 44 range 0 .. 31;
1c28fe3a
RD
873 Template at 52 range 0 .. 31;
874 Unit at 56 range 0 .. 31;
875 Time_Stamp at 60 range 0 .. 8 * Time_Stamp_Length - 1;
876 File_Type at 74 range 0 .. 7;
cf427f02 877 Inlined_Call at 88 range 0 .. 31;
1c28fe3a
RD
878 Inlined_Body at 75 range 0 .. 7;
879 License at 76 range 0 .. 7;
880 Keyword_Casing at 77 range 0 .. 7;
881 Identifier_Casing at 78 range 0 .. 15;
882 Sloc_Adjust at 80 range 0 .. 31;
883 Lines_Table_Max at 84 range 0 .. 31;
884
885 -- The following fields are pointers, so we have to specialize their
886 -- lengths using pointer size, obtained above as Standard'Address_Size.
887
cf427f02
AC
888 Source_Text at 92 range 0 .. AS - 1;
889 Lines_Table at 92 range AS .. AS * 2 - 1;
890 Logical_Lines_Table at 92 range AS * 2 .. AS * 3 - 1;
1c28fe3a
RD
891 end record;
892
cf427f02 893 for Source_File_Record'Size use 92 * 8 + AS * 3;
1c28fe3a
RD
894 -- This ensures that we did not leave out any fields
895
996ae0b0
RK
896 package Source_File is new Table.Table (
897 Table_Component_Type => Source_File_Record,
898 Table_Index_Type => Source_File_Index,
899 Table_Low_Bound => 1,
900 Table_Initial => Alloc.Source_File_Initial,
901 Table_Increment => Alloc.Source_File_Increment,
902 Table_Name => "Source_File");
903
cf427f02
AC
904 -- Auxiliary table containing source location of instantiations. Index 0
905 -- is used for code that does not come from an instance.
906
907 package Instances is new Table.Table (
908 Table_Component_Type => Source_Ptr,
909 Table_Index_Type => Instance_Id,
910 Table_Low_Bound => 0,
911 Table_Initial => Alloc.Source_File_Initial,
912 Table_Increment => Alloc.Source_File_Increment,
913 Table_Name => "Instances");
914
996ae0b0
RK
915 -----------------
916 -- Subprograms --
917 -----------------
918
919 procedure Alloc_Line_Tables
920 (S : in out Source_File_Record;
921 New_Max : Nat);
922 -- Allocate or reallocate the lines table for the given source file so
638e383e 923 -- that it can accommodate at least New_Max lines. Also allocates or
996ae0b0
RK
924 -- reallocates logical lines table if source ref pragmas are present.
925
926 procedure Add_Line_Tables_Entry
927 (S : in out Source_File_Record;
928 P : Source_Ptr);
929 -- Increment line table size by one (reallocating the lines table if
930 -- needed) and set the new entry to contain the value P. Also bumps
931 -- the Source_Line_Count field. If source reference pragmas are
932 -- present, also increments logical lines table size by one, and
933 -- sets new entry.
934
07fc65c4
GB
935 procedure Trim_Lines_Table (S : Source_File_Index);
936 -- Set lines table size for entry S in the source file table to
937 -- correspond to the current value of Num_Source_Lines, releasing
938 -- any unused storage. This is used by Sinput.L and Sinput.D.
939
996ae0b0 940end Sinput;
This page took 4.356314 seconds and 5 git commands to generate.