This is the mail archive of the fortran@gcc.gnu.org mailing list for the GNU Fortran project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[fortran-dev, patch] Parsed format data caching


Hi folks,

This patch uses the simplest of hashing functions to hash a format string and save a pointer to the parsed format data and its tokenized fnode tree.

This saves repeated re-parsing of format strings for every instance of a formatted I/O operation, especially inside loops.

The method is probabilistic, meaning it relies on a good chance you won't clobber previously saved data. If there is a collision, the previously saved data is discarded and replaced by the new. The hash table size is fixed with 16 entries.

On some test cases I have seen about 18% speed up in formatted I/O.

Regression tested on x86-64-linux-gnu.

OK for commit to fortran-dev branch? If anyone is curious, the patch will apply on 4.4 and 4.5 with some fuzz.

Regards,

Jerry
Index: io.h
===================================================================
--- io.h	(revision 145046)
+++ io.h	(working copy)
@@ -131,6 +131,18 @@ typedef struct array_loop_spec
 }
 array_loop_spec;
 
+/* A stucture to build a hash table for format data.  */
+
+#define FORMAT_HASH_SIZE 16 
+
+typedef struct format_hash_entry
+{
+  char *key;
+  gfc_charlen_type key_len;
+  struct format_data *hashed_fmt;
+}
+format_hash_entry;
+
 /* Representation of a namelist object in libgfortran
 
    Namelist Records
@@ -152,7 +164,6 @@ array_loop_spec;
 
 typedef struct namelist_type
 {
-
   /* Object type, stored as GFC_DTYPE_xxxx.  */
   bt type;
 
@@ -623,6 +634,9 @@ typedef struct gfc_unit
 
   int file_len;
   char *file;
+
+  /* The format hash table.  */
+  struct format_hash_entry format_hash_table[FORMAT_HASH_SIZE];
   
   /* Formatting buffer.  */
   struct fbuf *fbuf;
@@ -844,9 +858,18 @@ internal_proto(unget_format);
 extern void format_error (st_parameter_dt *, const fnode *, const char *);
 internal_proto(format_error);
 
-extern void free_format_data (st_parameter_dt *);
+extern void free_format_data (struct format_data *);
 internal_proto(free_format_data);
 
+extern void free_format_hash_table (gfc_unit *);
+internal_proto(free_format_hash_table);
+
+extern void init_format_hash (st_parameter_dt *);
+internal_proto(init_format_hash);
+
+extern void free_format_hash (st_parameter_dt *);
+internal_proto(free_format_hash);
+
 /* transfer.c */
 
 #define SCRATCH_SIZE 300
Index: unit.c
===================================================================
--- unit.c	(revision 145046)
+++ unit.c	(working copy)
@@ -642,7 +642,8 @@ close_unit_1 (gfc_unit *u, int locked)
     free_mem (u->file);
   u->file = NULL;
   u->file_len = 0;
-  
+
+  free_format_hash_table (u);  
   fbuf_destroy (u);
 
   if (!locked)
Index: transfer.c
===================================================================
--- transfer.c	(revision 145046)
+++ transfer.c	(working copy)
@@ -3056,7 +3275,8 @@ void
 st_read_done (st_parameter_dt *dtp)
 {
   finalize_transfer (dtp);
-  free_format_data (dtp);
+  if (is_internal_unit (dtp))
+    free_format_data (dtp->u.p.fmt);
   free_ionml (dtp);
   if (dtp->u.p.current_unit != NULL)
     unlock_unit (dtp->u.p.current_unit);
@@ -3107,7 +3327,8 @@ st_write_done (st_parameter_dt *dtp)
 	break;
       }
 
-  free_format_data (dtp);
+  if (is_internal_unit (dtp))
+    free_format_data (dtp->u.p.fmt);
   free_ionml (dtp);
   if (dtp->u.p.current_unit != NULL)
     unlock_unit (dtp->u.p.current_unit);
Index: format.c
===================================================================
--- format.c	(revision 145046)
+++ format.c	(working copy)
@@ -36,6 +36,7 @@ Boston, MA 02110-1301, USA.  */
 #include "io.h"
 #include <ctype.h>
 #include <string.h>
+#include <stdbool.h>
 
 #define FARRAY_SIZE 64
 
@@ -63,7 +64,7 @@ format_data;
 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
 				  NULL };
 
-/* Error messages */
+/* Error messages. */
 
 static const char posint_required[] = "Positive width required in format",
   period_required[] = "Period required in format",
@@ -75,6 +76,129 @@ static const char posint_required[] = "P
   reversion_error[] = "Exhausted data descriptors in format",
   zero_width[] = "Zero width in format descriptor";
 
+/* The following routines support caching format data from parsed format strings
+   into a hash table.  This avoids repeatedly parsing duplicate format strings
+   or format strings in I/O statements that are repeated in loops.  */
+
+
+/* Traverse the table and free all data.  */
+
+void
+free_format_hash_table (gfc_unit *u)
+{
+  size_t i;
+
+  /* free_format_data handles any NULL pointers.  */
+  for (i = 0; i < FORMAT_HASH_SIZE; i++)
+    {
+      if (u->format_hash_table[i].hashed_fmt != NULL)
+	free_format_data (u->format_hash_table[i].hashed_fmt);
+      u->format_hash_table[i].hashed_fmt = NULL;
+    }
+}
+
+/* Traverse the format_data structure and reset the fnode counters.  */
+
+static void
+reset_node (fnode *fn)
+{
+  fnode *f;
+
+  fn->count = 0;
+  fn->current = NULL;
+  
+  if (fn->format != FMT_LPAREN)
+    return;
+
+  for (f = fn->u.child; f; f = f->next)
+    {
+      if (f->format == FMT_RPAREN)
+	break;
+      reset_node (f);
+    }
+}
+
+static void
+reset_fnode_counters (st_parameter_dt *dtp)
+{
+  fnode *f;
+  format_data *fmt;
+
+  fmt = dtp->u.p.fmt;
+
+  /* Clear this pointer at the head so things start at the right place.  */
+  fmt->array.array[0].current = NULL;
+
+  for (f = fmt->last->array[0].u.child; f; f = f->next)
+    reset_node (f);
+}
+
+
+/* A simple hashing function to generate an index into the hash table.  */
+
+static inline
+uint32_t format_hash (st_parameter_dt *dtp)
+{
+  char *key;
+  size_t key_len;
+  uint32_t hash = 0;
+  size_t i;
+
+  /* Hash the format string. Super simple, but what the heck!  */
+  key = dtp->format;
+  key_len = dtp->format_len;
+  for (i = 0; i < key_len; i++)
+    hash ^= key[i];
+  hash &= (FORMAT_HASH_SIZE - 1);
+  return hash;
+}
+
+
+static void
+save_parsed_format (st_parameter_dt *dtp)
+{
+  uint32_t hash;
+  gfc_unit *u;
+
+  hash = format_hash (dtp);
+  u = dtp->u.p.current_unit;
+
+  /* Index into the hash table.  We are simply replacing whatever is there
+     relying on probability.  */
+  if (u->format_hash_table[hash].hashed_fmt != NULL)
+    free_format_data (u->format_hash_table[hash].hashed_fmt);
+  u->format_hash_table[hash].hashed_fmt = NULL;
+
+  u->format_hash_table[hash].key = dtp->format;
+  u->format_hash_table[hash].key_len = dtp->format_len;
+  u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
+}
+
+
+static format_data *
+find_parsed_format (st_parameter_dt *dtp)
+{
+  uint32_t hash;
+  gfc_unit *u;
+
+  hash = format_hash (dtp);
+  u = dtp->u.p.current_unit;
+
+  if (u->format_hash_table[hash].key != NULL)
+    {
+      /* See if it matches.  */
+      if (u->format_hash_table[hash].key_len == dtp->format_len)
+	{
+	  /* So far so good.  */
+	  if (strncmp (u->format_hash_table[hash].key,
+	      dtp->format, dtp->format_len) == 0)
+	    return u->format_hash_table[hash].hashed_fmt;
+	}
+    }
+  return NULL;
+}
+
+
 /* next_char()-- Return the next character in the format string.
  * Returns -1 when the string is done.  If the literal flag is set,
  * spaces are significant, otherwise they are not. */
@@ -90,7 +214,8 @@ next_char (format_data *fmt, int literal
 	return -1;
 
       fmt->format_string_len--;
-      fmt->error_element = c = toupper (*fmt->format_string++);
+      c = toupper (*fmt->format_string++);
+      fmt->error_element = c;
     }
   while ((c == ' ' || c == '\t') && !literal);
 
@@ -141,10 +266,10 @@ get_fnode (format_data *fmt, fnode **hea
 /* free_format_data()-- Free all allocated format data.  */
 
 void
-free_format_data (st_parameter_dt *dtp)
+free_format_data (format_data *fmt)
 {
   fnode_array *fa, *fa_next;
-  format_data *fmt = dtp->u.p.fmt;
+
 
   if (fmt == NULL)
     return;
@@ -156,7 +281,7 @@ free_format_data (st_parameter_dt *dtp)
     }
 
   free_mem (fmt);
-  dtp->u.p.fmt = NULL;
+  fmt = NULL;
 }
 
 
@@ -184,6 +309,14 @@ format_lex (format_data *fmt)
 
   switch (c)
     {
+    case '(':
+      token = FMT_LPAREN;
+      break;
+
+    case ')':
+      token = FMT_RPAREN;
+      break;
+
     case '-':
       negative_flag = 1;
       /* Fall Through */
@@ -276,14 +409,6 @@ format_lex (format_data *fmt)
 
       break;
 
-    case '(':
-      token = FMT_LPAREN;
-      break;
-
-    case ')':
-      token = FMT_RPAREN;
-      break;
-
     case 'X':
       token = FMT_X;
       break;
@@ -455,8 +580,10 @@ parse_format_list (st_parameter_dt *dtp)
   format_token t, u, t2;
   int repeat;
   format_data *fmt = dtp->u.p.fmt;
+  bool save_format;
 
   head = tail = NULL;
+  save_format = !is_internal_unit (dtp);
 
   /* Get the next format item */
  format_item:
@@ -567,6 +694,7 @@ parse_format_list (st_parameter_dt *dtp)
     case FMT_DP:
       notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
 		  "descriptor not allowed");
+      save_format = true;
     /* Fall through.  */
     case FMT_S:
     case FMT_SS:
@@ -592,6 +720,7 @@ parse_format_list (st_parameter_dt *dtp)
       get_fnode (fmt, &head, &tail, FMT_DOLLAR);
       tail->repeat = 1;
       notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
+      save_format = false;
       goto between_desc;
 
 
@@ -689,6 +818,7 @@ parse_format_list (st_parameter_dt *dtp)
 	      fmt->saved_token = t;
 	      fmt->value = 1;	/* Default width */
 	      notify_std (&dtp->common, GFC_STD_GNU, posint_required);
+	      save_format = false;
 	    }
 	}
 
@@ -999,6 +1129,33 @@ format_error (st_parameter_dt *dtp, cons
 }
 
 
+/* revert()-- Do reversion of the format.  Control reverts to the left
+ * parenthesis that matches the rightmost right parenthesis.  From our
+ * tree structure, we are looking for the rightmost parenthesis node
+ * at the second level, the first level always being a single
+ * parenthesis node.  If this node doesn't exit, we use the top
+ * level. */
+
+static void
+revert (st_parameter_dt *dtp)
+{
+  fnode *f, *r;
+  format_data *fmt = dtp->u.p.fmt;
+
+  dtp->u.p.reversion_flag = 1;
+
+  r = NULL;
+
+  for (f = fmt->array.array[0].u.child; f; f = f->next)
+    if (f->format == FMT_LPAREN)
+      r = f;
+
+  /* If r is NULL because no node was found, the whole tree will be used */
+
+  fmt->array.array[0].current = r;
+  fmt->array.array[0].count = 0;
+}
+
 /* parse_format()-- Parse a format string.  */
 
 void
@@ -1006,6 +1163,21 @@ parse_format (st_parameter_dt *dtp)
 {
   format_data *fmt;
 
+  /* Lookup format string to see if it has already been parsed.  */
+
+  dtp->u.p.fmt = find_parsed_format (dtp);
+
+  if (dtp->u.p.fmt != NULL)
+    {
+      dtp->u.p.fmt->reversion_ok = 0;
+      dtp->u.p.fmt->saved_token = FMT_NONE;
+      dtp->u.p.fmt->saved_format = NULL;
+      reset_fnode_counters (dtp);
+      return;
+    }
+
+  /* Not found so proceed as follows.  */
+
   dtp->u.p.fmt = fmt = get_mem (sizeof (format_data));
   fmt->format_string = dtp->format;
   fmt->format_string_len = dtp->format_len;
@@ -1037,35 +1209,12 @@ parse_format (st_parameter_dt *dtp)
     fmt->error = "Missing initial left parenthesis in format";
 
   if (fmt->error)
-    format_error (dtp, NULL, fmt->error);
-}
-
-
-/* revert()-- Do reversion of the format.  Control reverts to the left
- * parenthesis that matches the rightmost right parenthesis.  From our
- * tree structure, we are looking for the rightmost parenthesis node
- * at the second level, the first level always being a single
- * parenthesis node.  If this node doesn't exit, we use the top
- * level. */
-
-static void
-revert (st_parameter_dt *dtp)
-{
-  fnode *f, *r;
-  format_data *fmt = dtp->u.p.fmt;
-
-  dtp->u.p.reversion_flag = 1;
-
-  r = NULL;
-
-  for (f = fmt->array.array[0].u.child; f; f = f->next)
-    if (f->format == FMT_LPAREN)
-      r = f;
-
-  /* If r is NULL because no node was found, the whole tree will be used */
-
-  fmt->array.array[0].current = r;
-  fmt->array.array[0].count = 0;
+    {
+      format_error (dtp, NULL, fmt->error);
+      free_format_hash_table (dtp->u.p.current_unit);
+      return;
+    }
+  save_parsed_format (dtp);
 }
 
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]