This is the mail archive of the
fortran@gcc.gnu.org
mailing list for the GNU Fortran project.
[fortran-dev, patch] Parsed format data caching
- From: Jerry DeLisle <jvdelisle at verizon dot net>
- To: Fortran List <fortran at gcc dot gnu dot org>
- Cc: gcc-patches <gcc-patches at gcc dot gnu dot org>
- Date: Sat, 28 Mar 2009 18:18:21 -0700
- Subject: [fortran-dev, patch] Parsed format data caching
Hi folks,
This patch uses the simplest of hashing functions to hash a format string and
save a pointer to the parsed format data and its tokenized fnode tree.
This saves repeated re-parsing of format strings for every instance of a
formatted I/O operation, especially inside loops.
The method is probabilistic, meaning it relies on a good chance you won't
clobber previously saved data. If there is a collision, the previously saved
data is discarded and replaced by the new. The hash table size is fixed with 16
entries.
On some test cases I have seen about 18% speed up in formatted I/O.
Regression tested on x86-64-linux-gnu.
OK for commit to fortran-dev branch? If anyone is curious, the patch will apply
on 4.4 and 4.5 with some fuzz.
Regards,
Jerry
Index: io.h
===================================================================
--- io.h (revision 145046)
+++ io.h (working copy)
@@ -131,6 +131,18 @@ typedef struct array_loop_spec
}
array_loop_spec;
+/* A stucture to build a hash table for format data. */
+
+#define FORMAT_HASH_SIZE 16
+
+typedef struct format_hash_entry
+{
+ char *key;
+ gfc_charlen_type key_len;
+ struct format_data *hashed_fmt;
+}
+format_hash_entry;
+
/* Representation of a namelist object in libgfortran
Namelist Records
@@ -152,7 +164,6 @@ array_loop_spec;
typedef struct namelist_type
{
-
/* Object type, stored as GFC_DTYPE_xxxx. */
bt type;
@@ -623,6 +634,9 @@ typedef struct gfc_unit
int file_len;
char *file;
+
+ /* The format hash table. */
+ struct format_hash_entry format_hash_table[FORMAT_HASH_SIZE];
/* Formatting buffer. */
struct fbuf *fbuf;
@@ -844,9 +858,18 @@ internal_proto(unget_format);
extern void format_error (st_parameter_dt *, const fnode *, const char *);
internal_proto(format_error);
-extern void free_format_data (st_parameter_dt *);
+extern void free_format_data (struct format_data *);
internal_proto(free_format_data);
+extern void free_format_hash_table (gfc_unit *);
+internal_proto(free_format_hash_table);
+
+extern void init_format_hash (st_parameter_dt *);
+internal_proto(init_format_hash);
+
+extern void free_format_hash (st_parameter_dt *);
+internal_proto(free_format_hash);
+
/* transfer.c */
#define SCRATCH_SIZE 300
Index: unit.c
===================================================================
--- unit.c (revision 145046)
+++ unit.c (working copy)
@@ -642,7 +642,8 @@ close_unit_1 (gfc_unit *u, int locked)
free_mem (u->file);
u->file = NULL;
u->file_len = 0;
-
+
+ free_format_hash_table (u);
fbuf_destroy (u);
if (!locked)
Index: transfer.c
===================================================================
--- transfer.c (revision 145046)
+++ transfer.c (working copy)
@@ -3056,7 +3275,8 @@ void
st_read_done (st_parameter_dt *dtp)
{
finalize_transfer (dtp);
- free_format_data (dtp);
+ if (is_internal_unit (dtp))
+ free_format_data (dtp->u.p.fmt);
free_ionml (dtp);
if (dtp->u.p.current_unit != NULL)
unlock_unit (dtp->u.p.current_unit);
@@ -3107,7 +3327,8 @@ st_write_done (st_parameter_dt *dtp)
break;
}
- free_format_data (dtp);
+ if (is_internal_unit (dtp))
+ free_format_data (dtp->u.p.fmt);
free_ionml (dtp);
if (dtp->u.p.current_unit != NULL)
unlock_unit (dtp->u.p.current_unit);
Index: format.c
===================================================================
--- format.c (revision 145046)
+++ format.c (working copy)
@@ -36,6 +36,7 @@ Boston, MA 02110-1301, USA. */
#include "io.h"
#include <ctype.h>
#include <string.h>
+#include <stdbool.h>
#define FARRAY_SIZE 64
@@ -63,7 +64,7 @@ format_data;
static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
NULL };
-/* Error messages */
+/* Error messages. */
static const char posint_required[] = "Positive width required in format",
period_required[] = "Period required in format",
@@ -75,6 +76,129 @@ static const char posint_required[] = "P
reversion_error[] = "Exhausted data descriptors in format",
zero_width[] = "Zero width in format descriptor";
+/* The following routines support caching format data from parsed format strings
+ into a hash table. This avoids repeatedly parsing duplicate format strings
+ or format strings in I/O statements that are repeated in loops. */
+
+
+/* Traverse the table and free all data. */
+
+void
+free_format_hash_table (gfc_unit *u)
+{
+ size_t i;
+
+ /* free_format_data handles any NULL pointers. */
+ for (i = 0; i < FORMAT_HASH_SIZE; i++)
+ {
+ if (u->format_hash_table[i].hashed_fmt != NULL)
+ free_format_data (u->format_hash_table[i].hashed_fmt);
+ u->format_hash_table[i].hashed_fmt = NULL;
+ }
+}
+
+/* Traverse the format_data structure and reset the fnode counters. */
+
+static void
+reset_node (fnode *fn)
+{
+ fnode *f;
+
+ fn->count = 0;
+ fn->current = NULL;
+
+ if (fn->format != FMT_LPAREN)
+ return;
+
+ for (f = fn->u.child; f; f = f->next)
+ {
+ if (f->format == FMT_RPAREN)
+ break;
+ reset_node (f);
+ }
+}
+
+static void
+reset_fnode_counters (st_parameter_dt *dtp)
+{
+ fnode *f;
+ format_data *fmt;
+
+ fmt = dtp->u.p.fmt;
+
+ /* Clear this pointer at the head so things start at the right place. */
+ fmt->array.array[0].current = NULL;
+
+ for (f = fmt->last->array[0].u.child; f; f = f->next)
+ reset_node (f);
+}
+
+
+/* A simple hashing function to generate an index into the hash table. */
+
+static inline
+uint32_t format_hash (st_parameter_dt *dtp)
+{
+ char *key;
+ size_t key_len;
+ uint32_t hash = 0;
+ size_t i;
+
+ /* Hash the format string. Super simple, but what the heck! */
+ key = dtp->format;
+ key_len = dtp->format_len;
+ for (i = 0; i < key_len; i++)
+ hash ^= key[i];
+ hash &= (FORMAT_HASH_SIZE - 1);
+ return hash;
+}
+
+
+static void
+save_parsed_format (st_parameter_dt *dtp)
+{
+ uint32_t hash;
+ gfc_unit *u;
+
+ hash = format_hash (dtp);
+ u = dtp->u.p.current_unit;
+
+ /* Index into the hash table. We are simply replacing whatever is there
+ relying on probability. */
+ if (u->format_hash_table[hash].hashed_fmt != NULL)
+ free_format_data (u->format_hash_table[hash].hashed_fmt);
+ u->format_hash_table[hash].hashed_fmt = NULL;
+
+ u->format_hash_table[hash].key = dtp->format;
+ u->format_hash_table[hash].key_len = dtp->format_len;
+ u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
+}
+
+
+static format_data *
+find_parsed_format (st_parameter_dt *dtp)
+{
+ uint32_t hash;
+ gfc_unit *u;
+
+ hash = format_hash (dtp);
+ u = dtp->u.p.current_unit;
+
+ if (u->format_hash_table[hash].key != NULL)
+ {
+ /* See if it matches. */
+ if (u->format_hash_table[hash].key_len == dtp->format_len)
+ {
+ /* So far so good. */
+ if (strncmp (u->format_hash_table[hash].key,
+ dtp->format, dtp->format_len) == 0)
+ return u->format_hash_table[hash].hashed_fmt;
+ }
+ }
+ return NULL;
+}
+
+
/* next_char()-- Return the next character in the format string.
* Returns -1 when the string is done. If the literal flag is set,
* spaces are significant, otherwise they are not. */
@@ -90,7 +214,8 @@ next_char (format_data *fmt, int literal
return -1;
fmt->format_string_len--;
- fmt->error_element = c = toupper (*fmt->format_string++);
+ c = toupper (*fmt->format_string++);
+ fmt->error_element = c;
}
while ((c == ' ' || c == '\t') && !literal);
@@ -141,10 +266,10 @@ get_fnode (format_data *fmt, fnode **hea
/* free_format_data()-- Free all allocated format data. */
void
-free_format_data (st_parameter_dt *dtp)
+free_format_data (format_data *fmt)
{
fnode_array *fa, *fa_next;
- format_data *fmt = dtp->u.p.fmt;
+
if (fmt == NULL)
return;
@@ -156,7 +281,7 @@ free_format_data (st_parameter_dt *dtp)
}
free_mem (fmt);
- dtp->u.p.fmt = NULL;
+ fmt = NULL;
}
@@ -184,6 +309,14 @@ format_lex (format_data *fmt)
switch (c)
{
+ case '(':
+ token = FMT_LPAREN;
+ break;
+
+ case ')':
+ token = FMT_RPAREN;
+ break;
+
case '-':
negative_flag = 1;
/* Fall Through */
@@ -276,14 +409,6 @@ format_lex (format_data *fmt)
break;
- case '(':
- token = FMT_LPAREN;
- break;
-
- case ')':
- token = FMT_RPAREN;
- break;
-
case 'X':
token = FMT_X;
break;
@@ -455,8 +580,10 @@ parse_format_list (st_parameter_dt *dtp)
format_token t, u, t2;
int repeat;
format_data *fmt = dtp->u.p.fmt;
+ bool save_format;
head = tail = NULL;
+ save_format = !is_internal_unit (dtp);
/* Get the next format item */
format_item:
@@ -567,6 +694,7 @@ parse_format_list (st_parameter_dt *dtp)
case FMT_DP:
notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
"descriptor not allowed");
+ save_format = true;
/* Fall through. */
case FMT_S:
case FMT_SS:
@@ -592,6 +720,7 @@ parse_format_list (st_parameter_dt *dtp)
get_fnode (fmt, &head, &tail, FMT_DOLLAR);
tail->repeat = 1;
notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
+ save_format = false;
goto between_desc;
@@ -689,6 +818,7 @@ parse_format_list (st_parameter_dt *dtp)
fmt->saved_token = t;
fmt->value = 1; /* Default width */
notify_std (&dtp->common, GFC_STD_GNU, posint_required);
+ save_format = false;
}
}
@@ -999,6 +1129,33 @@ format_error (st_parameter_dt *dtp, cons
}
+/* revert()-- Do reversion of the format. Control reverts to the left
+ * parenthesis that matches the rightmost right parenthesis. From our
+ * tree structure, we are looking for the rightmost parenthesis node
+ * at the second level, the first level always being a single
+ * parenthesis node. If this node doesn't exit, we use the top
+ * level. */
+
+static void
+revert (st_parameter_dt *dtp)
+{
+ fnode *f, *r;
+ format_data *fmt = dtp->u.p.fmt;
+
+ dtp->u.p.reversion_flag = 1;
+
+ r = NULL;
+
+ for (f = fmt->array.array[0].u.child; f; f = f->next)
+ if (f->format == FMT_LPAREN)
+ r = f;
+
+ /* If r is NULL because no node was found, the whole tree will be used */
+
+ fmt->array.array[0].current = r;
+ fmt->array.array[0].count = 0;
+}
+
/* parse_format()-- Parse a format string. */
void
@@ -1006,6 +1163,21 @@ parse_format (st_parameter_dt *dtp)
{
format_data *fmt;
+ /* Lookup format string to see if it has already been parsed. */
+
+ dtp->u.p.fmt = find_parsed_format (dtp);
+
+ if (dtp->u.p.fmt != NULL)
+ {
+ dtp->u.p.fmt->reversion_ok = 0;
+ dtp->u.p.fmt->saved_token = FMT_NONE;
+ dtp->u.p.fmt->saved_format = NULL;
+ reset_fnode_counters (dtp);
+ return;
+ }
+
+ /* Not found so proceed as follows. */
+
dtp->u.p.fmt = fmt = get_mem (sizeof (format_data));
fmt->format_string = dtp->format;
fmt->format_string_len = dtp->format_len;
@@ -1037,35 +1209,12 @@ parse_format (st_parameter_dt *dtp)
fmt->error = "Missing initial left parenthesis in format";
if (fmt->error)
- format_error (dtp, NULL, fmt->error);
-}
-
-
-/* revert()-- Do reversion of the format. Control reverts to the left
- * parenthesis that matches the rightmost right parenthesis. From our
- * tree structure, we are looking for the rightmost parenthesis node
- * at the second level, the first level always being a single
- * parenthesis node. If this node doesn't exit, we use the top
- * level. */
-
-static void
-revert (st_parameter_dt *dtp)
-{
- fnode *f, *r;
- format_data *fmt = dtp->u.p.fmt;
-
- dtp->u.p.reversion_flag = 1;
-
- r = NULL;
-
- for (f = fmt->array.array[0].u.child; f; f = f->next)
- if (f->format == FMT_LPAREN)
- r = f;
-
- /* If r is NULL because no node was found, the whole tree will be used */
-
- fmt->array.array[0].current = r;
- fmt->array.array[0].count = 0;
+ {
+ format_error (dtp, NULL, fmt->error);
+ free_format_hash_table (dtp->u.p.current_unit);
+ return;
+ }
+ save_parsed_format (dtp);
}