This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [Patch, fortran] PR 25708 Reduce seeks when loading module files
On Wed, Nov 30, 2011 at 22:36, Mikael Morin <mikael.morin@sfr.fr> wrote:
> Hi,
>
>>
>> modseek1.diff
>> Â diff --git a/gcc/fortran/module.c b/gcc/fortran/module.c
>> index 70f8565..982425d 100644
>> --- a/gcc/fortran/module.c
>> +++ b/gcc/fortran/module.c
>> @@ -1069,51 +1069,49 @@ module_unget_char (void)
>> Âstatic void
>> Âparse_string (void)
[snip]
> I think you are changing the behaviour here. Single quotes in a string are
> encoded as two single quotes in a row. So when decoding, only one of them
> should be kept.
> Honestly, it may well be dead/unused code, but it's something we don't want to
> change at this point.
Ah, I misunderstood the existing code. But this is good, as it
simplifies my patch! :)
>> @@ -1293,22 +1291,15 @@ peek_atom (void)
[snip]
> I'm certainly nitpicking here, but for me it's better to have the file
> position before the unexpected atom than after it when throwing the error in
> bad_module.
> To save the call to fgetpos, (through get_module_locus) one could save/restore
> module_column and module_line as the call to bad_module doesn't need anything
> else.
Good point. Updated patch does this.
> About the rest of the patch, it's unfortunate that it makes the code slightly
> more difficult to read [this is my nitpicking day]. I think it's better to
> optimize peek_atom: the first character is sufficient to distinguish between
> atom types, so ungetc/module_unget_char can be used, even for strings, names
> or numbers.
Aaargh! Why didn't I think of this, instead spending most of the
evening trying to get rid of further peek_atom() calls! This idea is
much simpler and fixes all peek_atom() calls in one go.
With the updated patch, the number of lseek's when compiling
aermod.f90 drop to 380000, which is a factor of 15 reduction compared
to the current trunk. And a factor of 55 compared to trunk a few days
ago before Thomas' patch.
Updated patch attached. Regtested on x86_64-unknown-linux-gnu, Ok for trunk?
2011-11-31 Janne Blomqvist <jb@gcc.gnu.org>
PR fortran/25708
* module.c (parse_string): Read string into resizable array
instead of parsing twice and seeking.
(peek_atom): New implementation avoiding seeks.
(require_atom): Save and set column and line explicitly for error
handling.
--
Janne Blomqvist
diff --git a/gcc/fortran/module.c b/gcc/fortran/module.c
index 70f8565..4954888 100644
--- a/gcc/fortran/module.c
+++ b/gcc/fortran/module.c
@@ -1069,51 +1069,37 @@ module_unget_char (void)
static void
parse_string (void)
{
- module_locus start;
- int len, c;
- char *p;
-
- get_module_locus (&start);
+ int c;
+ size_t cursz = 30;
+ size_t len = 0;
- len = 0;
+ atom_string = XNEWVEC (char, cursz);
- /* See how long the string is. */
for ( ; ; )
{
c = module_char ();
- if (c == EOF)
- bad_module ("Unexpected end of module in string constant");
- if (c != '\'')
+ if (c == '\'')
{
- len++;
- continue;
+ int c2 = module_char ();
+ if (c2 != '\'')
+ {
+ module_unget_char ();
+ break;
+ }
}
- c = module_char ();
- if (c == '\'')
+ if (len >= cursz)
{
- len++;
- continue;
+ cursz *= 2;
+ atom_string = XRESIZEVEC (char, atom_string, cursz);
}
-
- break;
+ atom_string[len] = c;
+ len++;
}
- set_module_locus (&start);
-
- atom_string = p = XCNEWVEC (char, len + 1);
-
- for (; len > 0; len--)
- {
- c = module_char ();
- if (c == '\'')
- module_char (); /* Guaranteed to be another \'. */
- *p++ = c;
- }
-
- module_char (); /* Terminating \'. */
- *p = '\0'; /* C-style string for debug purposes. */
+ atom_string = XRESIZEVEC (char, atom_string, len + 1);
+ atom_string[len] = '\0'; /* C-style string for debug purposes. */
}
@@ -1279,17 +1265,100 @@ parse_atom (void)
static atom_type
peek_atom (void)
{
- module_locus m;
+ int c;
atom_type a;
- get_module_locus (&m);
+ do
+ {
+ c = module_char ();
+ }
+ while (c == ' ' || c == '\r' || c == '\n');
+
+ switch (c)
+ {
+ case '(':
+ module_unget_char ();
+ return ATOM_LPAREN;
+
+ case ')':
+ module_unget_char ();
+ return ATOM_RPAREN;
- a = parse_atom ();
- if (a == ATOM_STRING)
- free (atom_string);
+ case '\'':
+ module_unget_char ();
+ return ATOM_STRING;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ module_unget_char ();
+ return ATOM_INTEGER;
+
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'u':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+ module_unget_char ();
+ return ATOM_NAME;
- set_module_locus (&m);
- return a;
+ default:
+ bad_module ("Bad name");
+ }
}
@@ -1299,11 +1368,12 @@ peek_atom (void)
static void
require_atom (atom_type type)
{
- module_locus m;
atom_type t;
const char *p;
+ int column, line;
- get_module_locus (&m);
+ column = module_column;
+ line = module_line;
t = parse_atom ();
if (t != type)
@@ -1329,7 +1399,8 @@ require_atom (atom_type type)
gfc_internal_error ("require_atom(): bad atom type required");
}
- set_module_locus (&m);
+ module_column = column;
+ module_line = line;
bad_module (p);
}
}