This is the mail archive of the fortran@gcc.gnu.org mailing list for the GNU Fortran project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [patch, fortran] Wide character I/O Part 1.2 Round 3


FX wrote:
Hi Jerry,

Sorry I took so long to review your patch. Still not fully OK for me, but we're definitely very close to it.

---snip---


Well the attached patch is significantly simplified once I got a handle on the casts. I have learned some things on this one. New patch is against trunk and includes the test cases.

Regression tested on x86-64 and test cases pass on big endian ppc64. NIST tested. ppc64 regression testing in progress.

As FX stated before, no need for the endian_off scheme. No need for a macro as far as I can see.

OK for trunk?

Jerry
Index: gcc/testsuite/gfortran.dg/widechar_IO_1.f90
===================================================================
--- gcc/testsuite/gfortran.dg/widechar_IO_1.f90	(revision 0)
+++ gcc/testsuite/gfortran.dg/widechar_IO_1.f90	(revision 0)
@@ -0,0 +1,20 @@
+! { dg-do run }
+! Wide chracter I/O test 1, formatted and mixed kind
+! Test case developed by Jerry DeLisle <jvdelisle@gcc.gnu.org>
+program test1
+  integer, parameter :: k4 = 4
+  character(len=10,kind=4) :: wide
+  character(len=10,kind=1) :: thin
+  character(kind=1,len=25) :: buffer
+  wide=k4_"Goodbye!"
+  thin="Hello!"
+  write(buffer, '(a)') wide
+  if (buffer /= "Goodbye!") call abort
+  open(10, form="formatted", access="stream", status="scratch")
+  write(10, '(a)') thin
+  rewind(10)
+  read(10, '(a)') wide
+  if (wide /= k4_"Hello!") call abort
+  write(buffer,*) thin, ">",wide,"<"
+  if (buffer /= " Hello!    >Hello!    <") call abort
+end program test1
Index: gcc/testsuite/gfortran.dg/widechar_IO_2.f90
===================================================================
--- gcc/testsuite/gfortran.dg/widechar_IO_2.f90	(revision 0)
+++ gcc/testsuite/gfortran.dg/widechar_IO_2.f90	(revision 0)
@@ -0,0 +1,19 @@
+! { dg-do run }
+! Wide chracter I/O test 2, formatted array write and read
+! Test case developed by Jerry DeLisle <jvdelisle@gcc.gnu.org>
+program chkdata
+    integer, parameter :: k4=4
+    character(len=7, kind=k4), dimension(3) :: mychar
+    character(50) :: buffer
+    mychar(1) = k4_"abc1234"
+    mychar(2) = k4_"def5678"
+    mychar(3) = k4_"ghi9012"
+    buffer = ""
+    write(buffer,'(3(a))') mychar(2:3), mychar(1)
+    if (buffer /= "def5678ghi9012abc1234") call abort
+    write(buffer,'(3(a))') mychar
+    if (buffer /= "abc1234def5678ghi9012") call abort
+    mychar = ""
+    read(buffer,'(3(a))') mychar
+    if (any(mychar.ne.[ k4_"abc1234",k4_"def5678",k4_"ghi9012" ])) call abort
+end program chkdata
Index: gcc/testsuite/gfortran.dg/widechar_IO_3.f90
===================================================================
--- gcc/testsuite/gfortran.dg/widechar_IO_3.f90	(revision 0)
+++ gcc/testsuite/gfortran.dg/widechar_IO_3.f90	(revision 0)
@@ -0,0 +1,23 @@
+! { dg-do run }
+! Wide chracter I/O test 3, unformatted arrays
+! Test case developed by Jerry DeLisle <jvdelisle@gcc.gnu.org>
+program test1
+  integer, parameter :: k4 = 4
+  character(len=10,kind=4) :: wide
+  character(len=10,kind=4), dimension(5,7) :: widearray
+  wide = k4_"abcdefg"
+  widearray = k4_"1234abcd"
+  open(10, form="unformatted", status="scratch")
+  write(10) wide
+  rewind(10)
+  wide = "wrong"
+  read(10) wide
+  if (wide /= k4_"abcdefg") call abort
+  rewind(10)
+  write(10) widearray(2:4,3:7)
+  widearray(2:4,3:7)=""
+  rewind(10)
+  read(10) widearray(2:4,3:7)
+  close(10)
+  if (any(widearray.ne.k4_"1234abcd")) call abort
+end program test1
Index: gcc/testsuite/gfortran.dg/widechar_IO_4.f90
===================================================================
--- gcc/testsuite/gfortran.dg/widechar_IO_4.f90	(revision 0)
+++ gcc/testsuite/gfortran.dg/widechar_IO_4.f90	(revision 0)
@@ -0,0 +1,18 @@
+! { dg-do run }
+! { dg-options -fbackslash }
+! Wide chracter I/O test 4, formatted ISO-8859-1 characters in string
+! Test case developed by Jerry DeLisle <jvdelisle@gcc.gnu.org>
+! Compile with -fbackslash
+integer, parameter  :: k4 = 4
+character(kind=1,len=15) :: buffer
+character(kind=1, len=1) :: c1, c2
+character(kind=4,len=20) :: str = k4_'X\xF8öABC' ! ISO-8859-1 encoded string
+buffer = ""
+write(buffer,'(3a)')':',trim(str),':'
+if (buffer.ne.':X\xF8öABC: ') call abort
+str = ""
+read(buffer,'(3a)') c1,str(1:6),c2
+if (c1.ne.':') call abort
+if (str.ne.k4_'X\xF8öAB') call abort
+if (c2.ne.'C') call abort
+end
Index: gcc/fortran/gfortran.texi
===================================================================
--- gcc/fortran/gfortran.texi	(revision 136553)
+++ gcc/fortran/gfortran.texi	(working copy)
@@ -525,7 +525,7 @@ support is reported in the @ref{Fortran 
 documentation.
 
 The next version of the Fortran standard after Fortran 2003 is currently
-being developped and the GNU Fortran compiler supports some of its new
+being developed and the GNU Fortran compiler supports some of its new
 features. This support is based on the latest draft of the standard
 (available from @url{http://www.nag.co.uk/sc22wg5/}) and no guarantee of
 future compatibility is made, as the final standard might differ from the
Index: gcc/fortran/trans-io.c
===================================================================
--- gcc/fortran/trans-io.c	(revision 136553)
+++ gcc/fortran/trans-io.c	(working copy)
@@ -121,6 +121,7 @@ enum iocall
   IOCALL_X_INTEGER,
   IOCALL_X_LOGICAL,
   IOCALL_X_CHARACTER,
+  IOCALL_X_CHARACTER_WIDE,
   IOCALL_X_REAL,
   IOCALL_X_COMPLEX,
   IOCALL_X_ARRAY,
@@ -327,6 +328,13 @@ gfc_build_io_library_fndecls (void)
 				     void_type_node, 3, dt_parm_type,
 				     pvoid_type_node, gfc_int4_type_node);
 
+  iocall[IOCALL_X_CHARACTER_WIDE] =
+    gfc_build_library_function_decl (get_identifier
+				     (PREFIX("transfer_character_wide")),
+				     void_type_node, 4, dt_parm_type,
+				     pvoid_type_node, gfc_charlen_type_node,
+				     gfc_int4_type_node);
+
   iocall[IOCALL_X_REAL] =
     gfc_build_library_function_decl (get_identifier (PREFIX("transfer_real")),
 				     void_type_node, 3, dt_parm_type,
@@ -1977,7 +1985,7 @@ transfer_array_component (tree expr, gfc
 static void
 transfer_expr (gfc_se * se, gfc_typespec * ts, tree addr_expr, gfc_code * code)
 {
-  tree tmp, function, arg2, field, expr;
+  tree tmp, function, arg2, arg3, field, expr;
   gfc_component *c;
   int kind;
 
@@ -2009,6 +2017,7 @@ transfer_expr (gfc_se * se, gfc_typespec
   kind = ts->kind;
   function = NULL;
   arg2 = NULL;
+  arg3 = NULL;
 
   switch (ts->type)
     {
@@ -2033,6 +2042,26 @@ transfer_expr (gfc_se * se, gfc_typespec
       break;
 
     case BT_CHARACTER:
+      if (kind == 4)
+	{
+	  if (se->string_length)
+	    arg2 = se->string_length;
+	  else
+	    {
+	      tmp = build_fold_indirect_ref (addr_expr);
+	      gcc_assert (TREE_CODE (TREE_TYPE (tmp)) == ARRAY_TYPE);
+	      arg2 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (tmp)));
+	      arg2 = fold_convert (gfc_charlen_type_node, arg2);
+	    }
+	  arg3 = build_int_cst (NULL_TREE, kind);
+	  function = iocall[IOCALL_X_CHARACTER_WIDE];
+	  tmp = build_fold_addr_expr (dt_parm);
+	  tmp = build_call_expr (function, 4, tmp, addr_expr, arg2, arg3);
+	  gfc_add_expr_to_block (&se->pre, tmp);
+	  gfc_add_block_to_block (&se->pre, &se->post);
+	  return;
+	}
+      /* Fall through. */
     case BT_HOLLERITH:
       if (se->string_length)
 	arg2 = se->string_length;
Index: libgfortran/runtime/main.c
===================================================================
--- libgfortran/runtime/main.c	(revision 136553)
+++ libgfortran/runtime/main.c	(working copy)
@@ -45,11 +45,9 @@ stupid_function_name_for_static_linking 
   return;
 }
 
-/* This is the offset (in bytes) required to cast from logical(8)* to
-   logical(4)*. and still get the same result.  Will be 0 for little-endian
-   machines and 4 for big-endian machines.  */
-int l8_to_l4_offset = 0;
-
+/* This will be 0 for little-endian
+   machines and 1 for big-endian machines.  */
+int big_endian = 0;
 
 /* Figure out endianness for this machine.  */
 
@@ -64,9 +62,9 @@ determine_endianness (void)
 
   u.l8 = 1;
   if (u.l4[0])
-    l8_to_l4_offset = 0;
+    big_endian = 0;
   else if (u.l4[1])
-    l8_to_l4_offset = 1;
+    big_endian = 1;
   else
     runtime_error ("Unable to determine machine endianness");
 }
Index: libgfortran/gfortran.map
===================================================================
--- libgfortran/gfortran.map	(revision 136553)
+++ libgfortran/gfortran.map	(working copy)
@@ -1083,6 +1083,7 @@ GFORTRAN_1.1 {
     _gfortran_string_trim_char4;
     _gfortran_string_verify_char4;
     _gfortran_st_wait;
+    _gfortran_transfer_character_wide;
     _gfortran_transpose_char4;
     _gfortran_unpack0_char4;
     _gfortran_unpack1_char4;
Index: libgfortran/libgfortran.h
===================================================================
--- libgfortran/libgfortran.h	(revision 136553)
+++ libgfortran/libgfortran.h	(working copy)
@@ -272,13 +272,12 @@ typedef GFC_UINTEGER_4 gfc_char4_t;
    simply equal to the kind parameter itself.  */
 #define GFC_SIZE_OF_CHAR_KIND(kind) (kind)
 
-
 /* This will be 0 on little-endian machines and one on big-endian machines.  */
-extern int l8_to_l4_offset;
-internal_proto(l8_to_l4_offset);
+extern int big_endian;
+internal_proto(big_endian);
 
 #define GFOR_POINTER_TO_L1(p, kind) \
-  (l8_to_l4_offset * (kind - 1) + (GFC_LOGICAL_1 *)(p))
+  (big_endian * (kind - 1) + (GFC_LOGICAL_1 *)(p))
 
 #define GFC_INTEGER_1_HUGE \
   (GFC_INTEGER_1)((((GFC_UINTEGER_1)1) << 7) - 1)
Index: libgfortran/io/open.c
===================================================================
--- libgfortran/io/open.c	(revision 136553)
+++ libgfortran/io/open.c	(working copy)
@@ -107,7 +107,7 @@ static const st_option decimal_opt[] =
 
 static const st_option encoding_opt[] =
 {
-  /* TODO { "utf-8", ENCODING_UTF8}, */
+  { "utf-8", ENCODING_UTF8},
   { "default", ENCODING_DEFAULT},
   { NULL, 0}
 };
@@ -795,7 +795,7 @@ st_open (st_parameter_open *opp)
 	conv = compile_options.convert;
     }
   
-  /* We use l8_to_l4_offset, which is 0 on little-endian machines
+  /* We use big_endian, which is 0 on little-endian machines
      and 1 on big-endian machines.  */
   switch (conv)
     {
@@ -804,11 +804,11 @@ st_open (st_parameter_open *opp)
       break;
       
     case GFC_CONVERT_BIG:
-      conv = l8_to_l4_offset ? GFC_CONVERT_NATIVE : GFC_CONVERT_SWAP;
+      conv = big_endian ? GFC_CONVERT_NATIVE : GFC_CONVERT_SWAP;
       break;
       
     case GFC_CONVERT_LITTLE:
-      conv = l8_to_l4_offset ? GFC_CONVERT_SWAP : GFC_CONVERT_NATIVE;
+      conv = big_endian ? GFC_CONVERT_SWAP : GFC_CONVERT_NATIVE;
       break;
       
     default:
Index: libgfortran/io/list_read.c
===================================================================
--- libgfortran/io/list_read.c	(revision 136553)
+++ libgfortran/io/list_read.c	(working copy)
@@ -1728,7 +1728,8 @@ list_formatted_read_scalar (st_parameter
 			    int kind, size_t size)
 {
   char c;
-  int m;
+  gfc_char4_t *q;
+  int i, m;
   jmp_buf eof_jump;
 
   dtp->u.p.namelist_mode = 0;
@@ -1831,17 +1832,33 @@ list_formatted_read_scalar (st_parameter
 
     case BT_CHARACTER:
       if (dtp->u.p.saved_string)
-       {
+	{
 	  m = ((int) size < dtp->u.p.saved_used)
 	      ? (int) size : dtp->u.p.saved_used;
-	  memcpy (p, dtp->u.p.saved_string, m);
-       }
+	  if (kind == 1)
+	    memcpy (p, dtp->u.p.saved_string, m);
+	  else
+	    {
+	      q = (gfc_char4_t *) p;
+	      for (i = 0; i < m; i++)
+		q[i] = (unsigned char) dtp->u.p.saved_string[i];
+	    }
+	}
       else
 	/* Just delimiters encountered, nothing to copy but SPACE.  */
         m = 0;
 
       if (m < (int) size)
-	memset (((char *) p) + m, ' ', size - m);
+	{
+	  if (kind == 1)
+	    memset (((char *) p) + m, ' ', size - m);
+	  else
+	    {
+	      q = (gfc_char4_t *) p;
+	      for (i = m; i < (int) size; i++)
+		q[i] = (unsigned char) ' ';
+	    }
+	}
       break;
 
     case BT_NULL:
@@ -1862,6 +1879,8 @@ list_formatted_read (st_parameter_dt *dt
 {
   size_t elem;
   char *tmp;
+  size_t stride = type == BT_CHARACTER ?
+		  size * GFC_SIZE_OF_CHAR_KIND(kind) : size;
 
   tmp = (char *) p;
 
@@ -1869,7 +1888,7 @@ list_formatted_read (st_parameter_dt *dt
   for (elem = 0; elem < nelems; elem++)
     {
       dtp->u.p.item_count++;
-      list_formatted_read_scalar (dtp, type, tmp + size*elem, kind, size);
+      list_formatted_read_scalar (dtp, type, tmp + stride*elem, kind, size);
     }
 }
 
Index: libgfortran/io/read.c
===================================================================
--- libgfortran/io/read.c	(revision 136553)
+++ libgfortran/io/read.c	(working copy)
@@ -270,6 +270,43 @@ read_a (st_parameter_dt *dtp, const fnod
     memset (p + m, ' ', n);
 }
 
+void
+read_a_char4 (st_parameter_dt *dtp, const fnode *f, char *p, int length)
+{
+  char *s;
+  gfc_char4_t *dest;
+  int m, n, wi, status;
+  size_t w;
+
+  wi = f->u.w;
+  if (wi == -1) /* '(A)' edit descriptor  */
+    wi = length;
+
+  w = wi;
+
+  s = gfc_alloca (w);
+
+  /* Read in w bytes, treating comma as not a separator.  */
+  dtp->u.p.sf_read_comma = 0;
+  status = read_block_form (dtp, s, &w);
+  dtp->u.p.sf_read_comma =
+    dtp->u.p.decimal_status == DECIMAL_COMMA ? 0 : 1;
+  
+  if (status == FAILURE)
+    return;
+  if (w > (size_t) length)
+     s += (w - length);
+
+  m = ((int) w > length) ? length : (int) w;
+  
+  dest = (gfc_char4_t *) p;
+  
+  for (n = 0; n < m; n++, dest++, s++)
+    *dest = (unsigned char ) *s;
+
+  for (n = 0; n < length - (int) w; n++, dest++)
+    *dest = (unsigned char) ' ';
+}
 
 /* eat_leading_spaces()-- Given a character pointer and a width,
  * ignore the leading spaces.  */
Index: libgfortran/io/inquire.c
===================================================================
--- libgfortran/io/inquire.c	(revision 136553)
+++ libgfortran/io/inquire.c	(working copy)
@@ -268,10 +268,9 @@ inquire_via_unit (st_parameter_inquire *
 	  case ENCODING_DEFAULT:
 	    p = "UNKNOWN";
 	    break;
-	  /* TODO: Enable UTF-8 case here when implemented.
 	  case ENCODING_UTF8:
 	    p = "UTF-8";
-	    break; */
+	    break;
 	  default:
 	    internal_error (&iqp->common, "inquire_via_unit(): Bad encoding");
 	  }
@@ -497,13 +496,13 @@ inquire_via_unit (st_parameter_inquire *
       else
 	switch (u->flags.convert)
 	  {
-	    /*  l8_to_l4_offset is 0 for little-endian, 1 for big-endian.  */
+	    /*  big_endian is 0 for little-endian, 1 for big-endian.  */
 	  case GFC_CONVERT_NATIVE:
-	    p = l8_to_l4_offset ? "BIG_ENDIAN" : "LITTLE_ENDIAN";
+	    p = big_endian ? "BIG_ENDIAN" : "LITTLE_ENDIAN";
 	    break;
 
 	  case GFC_CONVERT_SWAP:
-	    p = l8_to_l4_offset ? "LITTLE_ENDIAN" : "BIG_ENDIAN";
+	    p = big_endian ? "LITTLE_ENDIAN" : "BIG_ENDIAN";
 	    break;
 
 	  default:
Index: libgfortran/io/io.h
===================================================================
--- libgfortran/io/io.h	(revision 136553)
+++ libgfortran/io/io.h	(working copy)
@@ -869,6 +869,9 @@ internal_proto(convert_real);
 extern void read_a (st_parameter_dt *, const fnode *, char *, int);
 internal_proto(read_a);
 
+extern void read_a_char4 (st_parameter_dt *, const fnode *, char *, int);
+internal_proto(read_a);
+
 extern void read_f (st_parameter_dt *, const fnode *, char *, int);
 internal_proto(read_f);
 
@@ -904,6 +907,9 @@ internal_proto(namelist_write);
 extern void write_a (st_parameter_dt *, const fnode *, const char *, int);
 internal_proto(write_a);
 
+extern void write_a_char4 (st_parameter_dt *, const fnode *, const char *, int);
+internal_proto(write_a_char4);
+
 extern void write_b (st_parameter_dt *, const fnode *, const char *, int);
 internal_proto(write_b);
 
Index: libgfortran/io/fbuf.c
===================================================================
--- libgfortran/io/fbuf.c	(revision 136553)
+++ libgfortran/io/fbuf.c	(working copy)
@@ -157,7 +157,7 @@ fbuf_seek (gfc_unit * u, gfc_offset off)
   /* Moving to the left past the flushed marked would imply moving past
      the left tab limit, which is never allowed. So return error if
      that is attempted.  */
-  if (pos < u->fbuf->flushed)
+  if (pos < (gfc_offset) u->fbuf->flushed)
     return -1;
   u->fbuf->pos = pos;
   return 0;
Index: libgfortran/io/transfer.c
===================================================================
--- libgfortran/io/transfer.c	(revision 136553)
+++ libgfortran/io/transfer.c	(working copy)
@@ -54,6 +54,7 @@ Boston, MA 02110-1301, USA.  */
       transfer_integer
       transfer_logical
       transfer_character
+      transfer_character_wide
       transfer_real
       transfer_complex
 
@@ -76,6 +77,9 @@ export_proto(transfer_logical);
 extern void transfer_character (st_parameter_dt *, void *, int);
 export_proto(transfer_character);
 
+extern void transfer_character_wide (st_parameter_dt *, void *, int, int);
+export_proto(transfer_character_wide);
+
 extern void transfer_complex (st_parameter_dt *, void *, int);
 export_proto(transfer_complex);
 
@@ -730,35 +734,43 @@ write_buf (st_parameter_dt *dtp, void *b
 
 static void
 unformatted_read (st_parameter_dt *dtp, bt type,
-		  void *dest, int kind __attribute__((unused)),
-		  size_t size, size_t nelems)
+		  void *dest, int kind, size_t size, size_t nelems)
 {
   size_t i, sz;
 
-  /* Currently, character implies size=1.  */
   if (dtp->u.p.current_unit->flags.convert == GFC_CONVERT_NATIVE
-      || size == 1 || type == BT_CHARACTER)
+      || size == 1)
     {
       sz = size * nelems;
+      if (type == BT_CHARACTER)
+	sz *= GFC_SIZE_OF_CHAR_KIND(kind);
       read_block_direct (dtp, dest, &sz);
     }
   else
     {
       char buffer[16];
       char *p;
-      
+
+      p = dest;
+
+      /* Handle wide chracters.  */
+      if (type == BT_CHARACTER && kind != 1)
+	{
+	  nelems *= size;
+	  size = kind;
+	}
+
       /* Break up complex into its constituent reals.  */
       if (type == BT_COMPLEX)
 	{
 	  nelems *= 2;
 	  size /= 2;
 	}
-      p = dest;
       
       /* By now, all complex variables have been split into their
 	 constituent reals.  */
       
-      for (i=0; i<nelems; i++)
+      for (i = 0; i < nelems; i++)
 	{
  	  read_block_direct (dtp, buffer, &size);
  	  reverse_memcpy (p, buffer, size);
@@ -775,20 +787,30 @@ unformatted_read (st_parameter_dt *dtp, 
 
 static void
 unformatted_write (st_parameter_dt *dtp, bt type,
-		   void *source, int kind __attribute__((unused)),
-		   size_t size, size_t nelems)
+		   void *source, int kind, size_t size, size_t nelems)
 {
   if (dtp->u.p.current_unit->flags.convert == GFC_CONVERT_NATIVE ||
-      size == 1 || type == BT_CHARACTER)
+      size == 1)
     {
-      size *= nelems;
-      write_buf (dtp, source, size);
+      size_t stride = type == BT_CHARACTER ?
+		  size * GFC_SIZE_OF_CHAR_KIND(kind) : size;
+
+      write_buf (dtp, source, stride * nelems);
     }
   else
     {
       char buffer[16];
       char *p;
       size_t i;
+
+      p = source;
+
+      /* Handle wide chracters.  */
+      if (type == BT_CHARACTER && kind != 1)
+	{
+	  nelems *= size;
+	  size = kind;
+	}
   
       /* Break up complex into its constituent reals.  */
       if (type == BT_COMPLEX)
@@ -797,16 +819,13 @@ unformatted_write (st_parameter_dt *dtp,
 	  size /= 2;
 	}      
 
-      p = source;
-
       /* By now, all complex variables have been split into their
 	 constituent reals.  */
 
-
-      for (i=0; i<nelems; i++)
+      for (i = 0; i < nelems; i++)
 	{
 	  reverse_memcpy(buffer, p, size);
- 	  p+= size;
+ 	  p += size;
 	  write_buf (dtp, buffer, size);
 	}
     }
@@ -904,7 +923,7 @@ require_type (st_parameter_dt *dtp, bt e
    of the next element, then comes back here to process it.  */
 
 static void
-formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int len,
+formatted_transfer_scalar (st_parameter_dt *dtp, bt type, void *p, int kind,
 			   size_t size)
 {
   char scratch[SCRATCH_SIZE];
@@ -1004,9 +1023,9 @@ formatted_transfer_scalar (st_parameter_
 	    return;
 
 	  if (dtp->u.p.mode == READING)
-	    read_decimal (dtp, f, p, len);
+	    read_decimal (dtp, f, p, kind);
 	  else
-	    write_i (dtp, f, p, len);
+	    write_i (dtp, f, p, kind);
 
 	  break;
 
@@ -1019,9 +1038,9 @@ formatted_transfer_scalar (st_parameter_
 	    return;
 
 	  if (dtp->u.p.mode == READING)
-	    read_radix (dtp, f, p, len, 2);
+	    read_radix (dtp, f, p, kind, 2);
 	  else
-	    write_b (dtp, f, p, len);
+	    write_b (dtp, f, p, kind);
 
 	  break;
 
@@ -1034,9 +1053,9 @@ formatted_transfer_scalar (st_parameter_
 	    return;
 
 	  if (dtp->u.p.mode == READING)
-	    read_radix (dtp, f, p, len, 8);
+	    read_radix (dtp, f, p, kind, 8);
 	  else
-	    write_o (dtp, f, p, len);
+	    write_o (dtp, f, p, kind);
 
 	  break;
 
@@ -1049,9 +1068,9 @@ formatted_transfer_scalar (st_parameter_
 	    return;
 
 	  if (dtp->u.p.mode == READING)
-	    read_radix (dtp, f, p, len, 16);
+	    read_radix (dtp, f, p, kind, 16);
 	  else
-	    write_z (dtp, f, p, len);
+	    write_z (dtp, f, p, kind);
 
 	  break;
 
@@ -1059,11 +1078,23 @@ formatted_transfer_scalar (st_parameter_
 	  if (n == 0)
 	    goto need_data;
 
+	  /* It is possible to have FMT_A with something not BT_CHARACTER such
+	     as when writing out hollerith strings, so check both type
+	     and kind before calling wide character routines.  */
 	  if (dtp->u.p.mode == READING)
-	    read_a (dtp, f, p, len);
+	    {
+	      if (type == BT_CHARACTER && kind == 4)
+		read_a_char4 (dtp, f, p, size);
+	      else
+		read_a (dtp, f, p, size);
+	    }
 	  else
-	    write_a (dtp, f, p, len);
-
+	    {
+	      if (type == BT_CHARACTER && kind == 4)
+		write_a_char4 (dtp, f, p, size);
+	      else
+		write_a (dtp, f, p, size);
+	    }
 	  break;
 
 	case FMT_L:
@@ -1071,9 +1102,9 @@ formatted_transfer_scalar (st_parameter_
 	    goto need_data;
 
 	  if (dtp->u.p.mode == READING)
-	    read_l (dtp, f, p, len);
+	    read_l (dtp, f, p, kind);
 	  else
-	    write_l (dtp, f, p, len);
+	    write_l (dtp, f, p, kind);
 
 	  break;
 
@@ -1084,9 +1115,9 @@ formatted_transfer_scalar (st_parameter_
 	    return;
 
 	  if (dtp->u.p.mode == READING)
-	    read_f (dtp, f, p, len);
+	    read_f (dtp, f, p, kind);
 	  else
-	    write_d (dtp, f, p, len);
+	    write_d (dtp, f, p, kind);
 
 	  break;
 
@@ -1097,9 +1128,9 @@ formatted_transfer_scalar (st_parameter_
 	    return;
 
 	  if (dtp->u.p.mode == READING)
-	    read_f (dtp, f, p, len);
+	    read_f (dtp, f, p, kind);
 	  else
-	    write_e (dtp, f, p, len);
+	    write_e (dtp, f, p, kind);
 	  break;
 
 	case FMT_EN:
@@ -1109,9 +1140,9 @@ formatted_transfer_scalar (st_parameter_
 	    return;
 
 	  if (dtp->u.p.mode == READING)
-	    read_f (dtp, f, p, len);
+	    read_f (dtp, f, p, kind);
 	  else
-	    write_en (dtp, f, p, len);
+	    write_en (dtp, f, p, kind);
 
 	  break;
 
@@ -1122,9 +1153,9 @@ formatted_transfer_scalar (st_parameter_
 	    return;
 
 	  if (dtp->u.p.mode == READING)
-	    read_f (dtp, f, p, len);
+	    read_f (dtp, f, p, kind);
 	  else
-	    write_es (dtp, f, p, len);
+	    write_es (dtp, f, p, kind);
 
 	  break;
 
@@ -1135,9 +1166,9 @@ formatted_transfer_scalar (st_parameter_
 	    return;
 
 	  if (dtp->u.p.mode == READING)
-	    read_f (dtp, f, p, len);
+	    read_f (dtp, f, p, kind);
 	  else
-	    write_f (dtp, f, p, len);
+	    write_f (dtp, f, p, kind);
 
 	  break;
 
@@ -1148,16 +1179,19 @@ formatted_transfer_scalar (st_parameter_
 	    switch (type)
 	      {
 	      case BT_INTEGER:
-		read_decimal (dtp, f, p, len);
+		read_decimal (dtp, f, p, kind);
 		break;
 	      case BT_LOGICAL:
-		read_l (dtp, f, p, len);
+		read_l (dtp, f, p, kind);
 		break;
 	      case BT_CHARACTER:
-		read_a (dtp, f, p, len);
+		if (kind == 4)
+		  read_a_char4 (dtp, f, p, size);
+		else
+		  read_a (dtp, f, p, size);
 		break;
 	      case BT_REAL:
-		read_f (dtp, f, p, len);
+		read_f (dtp, f, p, kind);
 		break;
 	      default:
 		goto bad_type;
@@ -1166,19 +1200,22 @@ formatted_transfer_scalar (st_parameter_
 	    switch (type)
 	      {
 	      case BT_INTEGER:
-		write_i (dtp, f, p, len);
+		write_i (dtp, f, p, kind);
 		break;
 	      case BT_LOGICAL:
-		write_l (dtp, f, p, len);
+		write_l (dtp, f, p, kind);	
 		break;
 	      case BT_CHARACTER:
-		write_a (dtp, f, p, len);
+		if (kind == 4)
+		  write_a_char4 (dtp, f, p, size);
+		else
+		  write_a (dtp, f, p, size);
 		break;
 	      case BT_REAL:
 		if (f->u.real.w == 0)
-		  write_real (dtp, p, len);
+		  write_real (dtp, p, kind);
 		else
-		  write_d (dtp, f, p, len);
+		  write_d (dtp, f, p, kind);
 		break;
 	      default:
 	      bad_type:
@@ -1407,12 +1444,13 @@ formatted_transfer (st_parameter_dt *dtp
   char *tmp;
 
   tmp = (char *) p;
-
+  size_t stride = type == BT_CHARACTER ?
+		  size * GFC_SIZE_OF_CHAR_KIND(kind) : size;
   /* Big loop over all the elements.  */
   for (elem = 0; elem < nelems; elem++)
     {
       dtp->u.p.item_count++;
-      formatted_transfer_scalar (dtp, type, tmp + size*elem, kind, size);
+      formatted_transfer_scalar (dtp, type, tmp + stride*elem, kind, size);
     }
 }
 
@@ -1465,10 +1503,26 @@ transfer_character (st_parameter_dt *dtp
   if (len == 0 && p == NULL)
     p = empty_string;
 
-  /* Currently we support only 1 byte chars, and the library is a bit
-     confused of character kind vs. length, so we kludge it by setting
-     kind = length.  */
-  dtp->u.p.transfer (dtp, BT_CHARACTER, p, len, len, 1);
+  /* Set kind here to 1.  */
+  dtp->u.p.transfer (dtp, BT_CHARACTER, p, 1, len, 1);
+}
+
+void
+transfer_character_wide (st_parameter_dt *dtp, void *p, int len, int kind)
+{
+  static char *empty_string[0];
+
+  if ((dtp->common.flags & IOPARM_LIBRETURN_MASK) != IOPARM_LIBRETURN_OK)
+    return;
+
+  /* Strings of zero length can have p == NULL, which confuses the
+     transfer routines into thinking we need more data elements.  To avoid
+     this, we give them a nice pointer.  */
+  if (len == 0 && p == NULL)
+    p = empty_string;
+
+  /* Here we pass the actual kind value.  */
+  dtp->u.p.transfer (dtp, BT_CHARACTER, p, kind, len, 1);
 }
 
 
@@ -1522,13 +1576,7 @@ transfer_array (st_parameter_dt *dtp, gf
       break;
     case GFC_DTYPE_CHARACTER:
       iotype = BT_CHARACTER;
-      /* FIXME: Currently dtype contains the charlen, which is
-	 clobbered if charlen > 2**24. That's why we use a separate
-	 argument for the charlen. However, if we want to support
-	 non-8-bit charsets we need to fix dtype to contain
-	 sizeof(chartype) and fix the code below.  */
       size = charlen;
-      kind = charlen;
       break;
     case GFC_DTYPE_DERIVED:
       internal_error (&dtp->common,
@@ -1542,7 +1590,9 @@ transfer_array (st_parameter_dt *dtp, gf
   for (n = 0; n < rank; n++)
     {
       count[n] = 0;
-      stride[n] = desc->dim[n].stride;
+      stride[n] = iotype == BT_CHARACTER ?
+		  desc->dim[n].stride * GFC_SIZE_OF_CHAR_KIND(kind) :
+		  desc->dim[n].stride;
       extent[n] = desc->dim[n].ubound + 1 - desc->dim[n].lbound;
 
       /* If the extent of even one dimension is zero, then the entire
@@ -1815,7 +1865,7 @@ data_transfer_init (st_parameter_dt *dtp
      if (conv == GFC_CONVERT_NONE)
        conv = compile_options.convert;
 
-     /* We use l8_to_l4_offset, which is 0 on little-endian machines
+     /* We use big_endian, which is 0 on little-endian machines
 	and 1 on big-endian machines.  */
      switch (conv)
        {
@@ -1824,11 +1874,11 @@ data_transfer_init (st_parameter_dt *dtp
 	 break;
 	 
        case GFC_CONVERT_BIG:
-	 conv = l8_to_l4_offset ? GFC_CONVERT_NATIVE : GFC_CONVERT_SWAP;
+	 conv = big_endian ? GFC_CONVERT_NATIVE : GFC_CONVERT_SWAP;
 	 break;
       
        case GFC_CONVERT_LITTLE:
-	 conv = l8_to_l4_offset ? GFC_CONVERT_SWAP : GFC_CONVERT_NATIVE;
+	 conv = big_endian ? GFC_CONVERT_SWAP : GFC_CONVERT_NATIVE;
 	 break;
 	 
        default:
Index: libgfortran/io/write.c
===================================================================
--- libgfortran/io/write.c	(revision 136553)
+++ libgfortran/io/write.c	(working copy)
@@ -124,6 +124,108 @@ write_a (st_parameter_dt *dtp, const fno
 #endif
 }
 
+
+/* The primary difference between write_a_char4 and write_a is that we have to
+   deal with writing from the first byte of the 4-byte character and take care
+   of endianess.  This currently implements encoding="default" which means we
+   write the lowest significant byte. If the 3 most significant bytes are
+   not representable emit a '?'.  TODO: Implement encoding="UTF-8"
+   which will process all 4 bytes and translate to the encoded output.  */
+
+void
+write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len)
+{
+  int wlen;
+  char *p;
+  gfc_char4_t *q;
+
+  wlen = f->u.string.length < 0
+	 || (f->format == FMT_G && f->u.string.length == 0)
+	 ? len : f->u.string.length;
+
+  q = (gfc_char4_t *) source;
+#ifdef HAVE_CRLF
+  /* If this is formatted STREAM IO convert any embedded line feed characters
+     to CR_LF on systems that use that sequence for newlines.  See F2003
+     Standard sections 10.6.3 and 9.9 for further information.  */
+  if (is_stream_io (dtp))
+    {
+      const char crlf[] = "\r\n";
+      int i, j, bytes;
+      gfc_char4_t *qq;
+      bytes = 0;
+
+      /* Write out any padding if needed.  */
+      if (len < wlen)
+	{
+	  p = write_block (dtp, wlen - len);
+	  if (p == NULL)
+	    return;
+	  memset (p, ' ', wlen - len);
+	}
+
+      /* Scan the source string looking for '\n' and convert it if found.  */
+      qq = (gfc_char4_t *) source;
+      for (i = 0; i < wlen; i++)
+	{
+	  if (qq[i] == '\n')
+	    {
+	      /* Write out the previously scanned characters in the string.  */
+	      if (bytes > 0)
+		{
+		  p = write_block (dtp, bytes);
+		  if (p == NULL)
+		    return;
+		  for (j = 0; j < bytes; j++)
+		    p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
+		  bytes = 0;
+		}
+
+	      /* Write out the CR_LF sequence.  */ 
+	      p = write_block (dtp, 2);
+              if (p == NULL)
+                return;
+	      memcpy (p, crlf, 2);
+	    }
+	  else
+	    bytes++;
+	}
+
+      /*  Write out any remaining bytes if no LF was found.  */
+      if (bytes > 0)
+	{
+	  p = write_block (dtp, bytes);
+	  if (p == NULL)
+	    return;
+	  for (j = 0; j < bytes; j++)
+	    p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
+	}
+    }
+  else
+    {
+#endif
+      int j;
+      p = write_block (dtp, wlen);
+      if (p == NULL)
+	return;
+
+      if (wlen < len)
+	{
+	  for (j = 0; j < wlen; j++)
+	    p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
+	}
+      else
+	{
+	  memset (p, ' ', wlen - len);
+	  for (j = wlen - len; j < wlen; j++)
+	    p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
+	}
+#ifdef HAVE_CRLF
+    }
+#endif
+}
+
+
 static GFC_INTEGER_LARGEST
 extract_int (const void *p, int len)
 {
@@ -639,10 +741,12 @@ write_integer (st_parameter_dt *dtp, con
    the strings if the file has been opened in that mode.  */
 
 static void
-write_character (st_parameter_dt *dtp, const char *source, int length)
+write_character (st_parameter_dt *dtp, const char *source, int kind, int length)
 {
   int i, extra;
   char *p, d;
+  gfc_char4_t *q;
+
 
   switch (dtp->u.p.delim_status)
     {
@@ -657,35 +761,77 @@ write_character (st_parameter_dt *dtp, c
       break;
     }
 
-  if (d == ' ')
-    extra = 0;
-  else
+  if (kind == 1)
     {
-      extra = 2;
+      if (d == ' ')
+	extra = 0;
+      else
+	{
+	  extra = 2;
 
-      for (i = 0; i < length; i++)
-	if (source[i] == d)
-	  extra++;
-    }
+	    for (i = 0; i < length; i++)
+	      if (source[i] == d)
+		extra++;
+	}
 
-  p = write_block (dtp, length + extra);
-  if (p == NULL)
-    return;
+      p = write_block (dtp, length + extra);
+      if (p == NULL)
+	return;
+
+      if (d == ' ')
+	memcpy (p, source, length);
+      else
+	{
+	  *p++ = d;
 
-  if (d == ' ')
-    memcpy (p, source, length);
+	  for (i = 0; i < length; i++)
+            {
+              *p++ = source[i];
+              if (source[i] == d)
+		*p++ = d;
+	    }
+
+	  *p = d;
+	}
+    }
   else
     {
-      *p++ = d;
-
-      for (i = 0; i < length; i++)
+      /* We have to scan the source string looking for delimiters to determine
+	 how large the write block needs to be.  */
+      if (d == ' ')
+	extra = 0;
+      else
 	{
-	  *p++ = source[i];
-	  if (source[i] == d)
-	    *p++ = d;
+	  extra = 2;
+
+	  q = (gfc_char4_t *) source;
+	  for (i = 0; i < length; i++, q++)
+	    if (*q == (gfc_char4_t) d)
+	      extra++;
 	}
 
-      *p = d;
+      p = write_block (dtp, length + extra);
+      if (p == NULL)
+	return;
+
+      if (d == ' ')
+	{
+	  q = (gfc_char4_t *) source;
+	  for (i = 0; i < length; i++, q++)
+	    p[i] = *q > 255 ? '?' : (unsigned char) *q;
+	}
+      else
+	{
+	  *p++ = d;
+	  q = (gfc_char4_t *) source;
+	  for (i = 0; i < length; i++, q++)
+	    {
+	      *p++ = *q > 255 ? '?' : (unsigned char) *q;
+	      if (*q == (gfc_char4_t) d)
+		*p++ = d;
+	    }
+	  *p = d;
+	}
     }
 }
 
@@ -796,7 +942,7 @@ list_formatted_write_scalar (st_paramete
       write_logical (dtp, p, kind);
       break;
     case BT_CHARACTER:
-      write_character (dtp, p, kind);
+      write_character (dtp, p, kind, size);
       break;
     case BT_REAL:
       write_real (dtp, p, kind);
@@ -818,6 +964,8 @@ list_formatted_write (st_parameter_dt *d
 {
   size_t elem;
   char *tmp;
+  size_t stride = type == BT_CHARACTER ?
+		  size * GFC_SIZE_OF_CHAR_KIND(kind) : size;
 
   tmp = (char *) p;
 
@@ -825,7 +973,7 @@ list_formatted_write (st_parameter_dt *d
   for (elem = 0; elem < nelems; elem++)
     {
       dtp->u.p.item_count++;
-      list_formatted_write_scalar (dtp, type, tmp + size*elem, kind, size);
+      list_formatted_write_scalar (dtp, type, tmp + elem * stride, kind, size);
     }
 }
 
@@ -889,9 +1037,9 @@ nml_write_obj (st_parameter_dt *dtp, nam
   if (obj->type != GFC_DTYPE_DERIVED)
     {
 #ifdef HAVE_CRLF
-      write_character (dtp, "\r\n ", 3);
+      write_character (dtp, "\r\n ", 1, 3);
 #else
-      write_character (dtp, "\n ", 2);
+      write_character (dtp, "\n ", 1, 2);
 #endif
       len = 0;
       if (base)
@@ -900,15 +1048,15 @@ nml_write_obj (st_parameter_dt *dtp, nam
 	  for (dim_i = 0; dim_i < (index_type) strlen (base_name); dim_i++)
             {
 	      cup = toupper (base_name[dim_i]);
-	      write_character (dtp, &cup, 1);
+	      write_character (dtp, &cup, 1, 1);
             }
 	}
       for (dim_i =len; dim_i < (index_type) strlen (obj->var_name); dim_i++)
 	{
 	  cup = toupper (obj->var_name[dim_i]);
-	  write_character (dtp, &cup, 1);
+	  write_character (dtp, &cup, 1, 1);
 	}
-      write_character (dtp, "=", 1);
+      write_character (dtp, "=", 1, 1);
     }
 
   /* Counts the number of data output on a line, including names.  */
@@ -978,7 +1126,7 @@ nml_write_obj (st_parameter_dt *dtp, nam
 	  if (rep_ctr > 1)
 	    {
 	      sprintf(rep_buff, " %d*", rep_ctr);
-	      write_character (dtp, rep_buff, strlen (rep_buff));
+	      write_character (dtp, rep_buff, 1, strlen (rep_buff));
 	      dtp->u.p.no_leading_blank = 1;
 	    }
 	  num++;
@@ -1003,7 +1151,7 @@ nml_write_obj (st_parameter_dt *dtp, nam
 		dtp->u.p.delim_status = DELIM_QUOTE;
 	      if (dtp->u.p.nml_delim == '\'')
 		dtp->u.p.delim_status = DELIM_APOSTROPHE;
-	      write_character (dtp, p, obj->string_length);
+	      write_character (dtp, p, 1, obj->string_length);
 	      dtp->u.p.delim_status = tmp_delim;
               break;
 
@@ -1093,14 +1241,14 @@ nml_write_obj (st_parameter_dt *dtp, nam
 	     to column 2. Reset the repeat counter.  */
 
 	  dtp->u.p.no_leading_blank = 0;
-	  write_character (dtp, &semi_comma, 1);
+	  write_character (dtp, &semi_comma, 1, 1);
 	  if (num > 5)
 	    {
 	      num = 0;
 #ifdef HAVE_CRLF
-	      write_character (dtp, "\r\n ", 3);
+	      write_character (dtp, "\r\n ", 1, 3);
 #else
-	      write_character (dtp, "\n ", 2);
+	      write_character (dtp, "\n ", 1, 2);
 #endif
 	    }
 	  rep_ctr = 1;
@@ -1164,13 +1312,13 @@ namelist_write (st_parameter_dt *dtp)
   /* Temporarily disable namelist delimters.  */
   dtp->u.p.delim_status = DELIM_NONE;
 
-  write_character (dtp, "&", 1);
+  write_character (dtp, "&", 1, 1);
 
   /* Write namelist name in upper case - f95 std.  */
   for (i = 0 ;i < dtp->namelist_name_len ;i++ )
     {
       c = toupper (dtp->namelist_name[i]);
-      write_character (dtp, &c ,1);
+      write_character (dtp, &c, 1 ,1);
     }
 
   if (dtp->u.p.ionml != NULL)
@@ -1184,9 +1332,9 @@ namelist_write (st_parameter_dt *dtp)
     }
 
 #ifdef HAVE_CRLF
-  write_character (dtp, "  /\r\n", 5);
+  write_character (dtp, "  /\r\n", 1, 5);
 #else
-  write_character (dtp, "  /\n", 4);
+  write_character (dtp, "  /\n", 1, 4);
 #endif
 
   /* Restore the original delimiter.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]