From facb23dc4ec9ced099de4b5a7a147ab8b972fa23 Mon Sep 17 00:00:00 2001 From: Janne Blomqvist Date: Wed, 23 Jan 2013 23:56:54 +0200 Subject: [PATCH] Improve performance of byte-swapped I/O. 2013-01-23 Janne Blomqvist * io/file_pos.c (unformatted_backspace): Use __builtin_bswapXX instead of reverse_memcpy. * io/io.h (reverse_memcpy): Remove prototype. * io/transfer.c (reverse_memcpy): Make static, move towards beginning of file. (bswap_array): New function. (unformatted_read): Use bswap_array to byte swap the data in-place. (unformatted_write): Use a larger temp buffer and bswap_array. (us_read): Use __builtin_bswapXX instead of reverse_memcpy. (write_us_marker): Likewise. From-SVN: r195413 --- libgfortran/ChangeLog | 14 +++ libgfortran/io/file_pos.c | 10 +- libgfortran/io/io.h | 3 - libgfortran/io/transfer.c | 219 +++++++++++++++++++++++++++----------- 4 files changed, 178 insertions(+), 68 deletions(-) diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog index 887e2eb86705..d4ccff4f2a6b 100644 --- a/libgfortran/ChangeLog +++ b/libgfortran/ChangeLog @@ -1,3 +1,17 @@ +2013-01-23 Janne Blomqvist + + * io/file_pos.c (unformatted_backspace): Use __builtin_bswapXX + instead of reverse_memcpy. + * io/io.h (reverse_memcpy): Remove prototype. + * io/transfer.c (reverse_memcpy): Make static, move towards + beginning of file. + (bswap_array): New function. + (unformatted_read): Use bswap_array to byte swap the data + in-place. + (unformatted_write): Use a larger temp buffer and bswap_array. + (us_read): Use __builtin_bswapXX instead of reverse_memcpy. + (write_us_marker): Likewise. + 2013-01-14 Richard Sandiford Update copyright years. diff --git a/libgfortran/io/file_pos.c b/libgfortran/io/file_pos.c index cf68c9e2f366..8b4fda3dd91b 100644 --- a/libgfortran/io/file_pos.c +++ b/libgfortran/io/file_pos.c @@ -139,15 +139,21 @@ unformatted_backspace (st_parameter_filepos *fpp, gfc_unit *u) } else { + uint32_t u32; + uint64_t u64; switch (length) { case sizeof(GFC_INTEGER_4): - reverse_memcpy (&m4, p, sizeof (m4)); + memcpy (&u32, p, sizeof (u32)); + u32 = __builtin_bswap32 (u32); + memcpy (&m4, &u32, sizeof (m4)); m = m4; break; case sizeof(GFC_INTEGER_8): - reverse_memcpy (&m8, p, sizeof (m8)); + memcpy (&u64, p, sizeof (u64)); + u64 = __builtin_bswap64 (u64); + memcpy (&m8, &u64, sizeof (m8)); m = m8; break; diff --git a/libgfortran/io/io.h b/libgfortran/io/io.h index 6d9baac9c0c6..8ea9326670b5 100644 --- a/libgfortran/io/io.h +++ b/libgfortran/io/io.h @@ -647,9 +647,6 @@ internal_proto(init_loop_spec); extern void next_record (st_parameter_dt *, int); internal_proto(next_record); -extern void reverse_memcpy (void *, const void *, size_t); -internal_proto (reverse_memcpy); - extern void st_wait (st_parameter_wait *); export_proto(st_wait); diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c index 9d2956dc9a38..515c34f378ed 100644 --- a/libgfortran/io/transfer.c +++ b/libgfortran/io/transfer.c @@ -877,50 +877,138 @@ write_buf (st_parameter_dt *dtp, void *buf, size_t nbytes) } -/* Master function for unformatted reads. */ +/* Reverse memcpy - used for byte swapping. */ static void -unformatted_read (st_parameter_dt *dtp, bt type, - void *dest, int kind, size_t size, size_t nelems) +reverse_memcpy (void *dest, const void *src, size_t n) { - if (likely (dtp->u.p.current_unit->flags.convert == GFC_CONVERT_NATIVE) - || kind == 1) + char *d, *s; + size_t i; + + d = (char *) dest; + s = (char *) src + n - 1; + + /* Write with ascending order - this is likely faster + on modern architectures because of write combining. */ + for (i=0; iu.p.current_unit->flags.convert == GFC_CONVERT_SWAP) + && kind != 1) + { /* Handle wide chracters. */ - if (type == BT_CHARACTER && kind != 1) - { - nelems *= size; - size = kind; - } + if (type == BT_CHARACTER) + { + nelems *= size; + size = kind; + } /* Break up complex into its constituent reals. */ - if (type == BT_COMPLEX) - { - nelems *= 2; - size /= 2; - } - - /* By now, all complex variables have been split into their - constituent reals. */ - - for (i = 0; i < nelems; i++) - { - read_block_direct (dtp, buffer, size); - reverse_memcpy (p, buffer, size); - p += size; - } + else if (type == BT_COMPLEX) + { + nelems *= 2; + size /= 2; + } + bswap_array (dest, dest, size, nelems); } } @@ -944,9 +1032,10 @@ unformatted_write (st_parameter_dt *dtp, bt type, } else { - char buffer[16]; +#define BSWAP_BUFSZ 512 + char buffer[BSWAP_BUFSZ]; char *p; - size_t i; + size_t nrem; p = source; @@ -967,12 +1056,21 @@ unformatted_write (st_parameter_dt *dtp, bt type, /* By now, all complex variables have been split into their constituent reals. */ - for (i = 0; i < nelems; i++) + nrem = nelems; + do { - reverse_memcpy(buffer, p, size); - p += size; - write_buf (dtp, buffer, size); + size_t nc; + if (size * nrem > BSWAP_BUFSZ) + nc = BSWAP_BUFSZ / size; + else + nc = nrem; + + bswap_array (buffer, p, size, nc); + write_buf (dtp, buffer, size * nc); + p += size * nc; + nrem -= nc; } + while (nrem > 0); } } @@ -2152,15 +2250,22 @@ us_read (st_parameter_dt *dtp, int continued) } } else + { + uint32_t u32; + uint64_t u64; switch (nr) { case sizeof(GFC_INTEGER_4): - reverse_memcpy (&i4, &i, sizeof (i4)); + memcpy (&u32, &i, sizeof (u32)); + u32 = __builtin_bswap32 (u32); + memcpy (&i4, &u32, sizeof (i4)); i = i4; break; case sizeof(GFC_INTEGER_8): - reverse_memcpy (&i8, &i, sizeof (i8)); + memcpy (&u64, &i, sizeof (u64)); + u64 = __builtin_bswap64 (u64); + memcpy (&i8, &u64, sizeof (i8)); i = i8; break; @@ -2168,6 +2273,7 @@ us_read (st_parameter_dt *dtp, int continued) runtime_error ("Illegal value for record marker"); break; } + } if (i >= 0) { @@ -3035,7 +3141,6 @@ write_us_marker (st_parameter_dt *dtp, const gfc_offset buf) size_t len; GFC_INTEGER_4 buf4; GFC_INTEGER_8 buf8; - char p[sizeof (GFC_INTEGER_8)]; if (compile_options.record_marker == 0) len = sizeof (GFC_INTEGER_4); @@ -3064,18 +3169,22 @@ write_us_marker (st_parameter_dt *dtp, const gfc_offset buf) } else { + uint32_t u32; + uint64_t u64; switch (len) { case sizeof (GFC_INTEGER_4): buf4 = buf; - reverse_memcpy (p, &buf4, sizeof (GFC_INTEGER_4)); - return swrite (dtp->u.p.current_unit->s, p, len); + memcpy (&u32, &buf4, sizeof (u32)); + u32 = __builtin_bswap32 (u32); + return swrite (dtp->u.p.current_unit->s, &u32, len); break; case sizeof (GFC_INTEGER_8): buf8 = buf; - reverse_memcpy (p, &buf8, sizeof (GFC_INTEGER_8)); - return swrite (dtp->u.p.current_unit->s, p, len); + memcpy (&u64, &buf8, sizeof (u64)); + u64 = __builtin_bswap64 (u64); + return swrite (dtp->u.p.current_unit->s, &u64, len); break; default: @@ -3712,22 +3821,6 @@ st_set_nml_var_dim (st_parameter_dt *dtp, GFC_INTEGER_4 n_dim, GFC_DIMENSION_SET(nml->dim[n],lbound,ubound,stride); } -/* Reverse memcpy - used for byte swapping. */ - -void reverse_memcpy (void *dest, const void *src, size_t n) -{ - char *d, *s; - size_t i; - - d = (char *) dest; - s = (char *) src + n - 1; - - /* Write with ascending order - this is likely faster - on modern architectures because of write combining. */ - for (i=0; i