This is the mail archive of the fortran@gcc.gnu.org mailing list for the GNU Fortran project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

String case (was: Re: Accepts invalid?)


As for converting case in Fortran, and the example given by Daniel
Franke, there are at least three other approaches:

1. roll your own version of INDEX exploiting the fact that the
lowercase/uppercase strings are sorted;
2. Use a giant SELECT statement
    SELECT CASE(in(i:i))
    CASE('A')
      out(i:i) = 'a'
 .....
    CASE DEFAULT
      out(i:i) = in(i:i)
3. Build a table such that
   tolowertab(i) = achar(i)
   everywhere, except for 
   tolowertab(ichar('A')) = 'a'
    etc. 

The attached code read a bunch (128K) lines of 80 characters from a
file, then applies in succession the four algorithms to substrings of
the  lines, with growing lengths. The input file is a mixture of Fortran
90, C and TeX source files. 

The results with gfortran 4.3.1 -O3 -march=native on my Core 2 duo say
that the tabular approach wins hands down for long strings, but the
binary search index is also good for short strings. The SELECT is really
bad. 
YMMV. 

Note that this does NOT take into account accented letters, something
that my native language would want, but then, in that case, you're
probably better off with some other tools.... 

 Timings for tolower on       131072  strings.
 Length            Index   Srt. Index       Select          Tab
       1      4.3402E-02   2.2024E-02   7.0476E-02   2.1090E-02
       2      6.6103E-02   2.4197E-02   1.1923E-01   2.2238E-02
       3      8.8907E-02   2.7437E-02   1.6985E-01   2.2252E-02
       4      1.1021E-01   2.8809E-02   2.1861E-01   2.1893E-02
       5      1.3308E-01   3.0973E-02   2.6929E-01   2.2461E-02
       6      1.5539E-01   3.2335E-02   3.1678E-01   2.2976E-02
       7      1.7803E-01   3.4322E-02   3.6537E-01   2.3674E-02
       8      1.9977E-01   4.0478E-02   3.7977E-01   2.3372E-02
       9      2.2328E-01   4.2629E-02   4.1650E-01   2.4409E-02
      10      2.4483E-01   4.6903E-02   4.5040E-01   2.4451E-02
      15      3.5693E-01   5.9450E-02   6.3592E-01   2.6715E-02
      20      4.6643E-01   7.2292E-02   8.3001E-01   2.7864E-02
      25      5.7704E-01   8.5622E-02   1.0197E+00   3.0456E-02
      30      6.8782E-01   9.8541E-02   1.2019E+00   3.2168E-02
      35      7.9851E-01   1.1138E-01   1.3946E+00   3.4454E-02
      40      9.0826E-01   1.2303E-01   1.5804E+00   3.5742E-02
      50      1.1306E+00   1.4602E-01   1.9465E+00   3.9895E-02
      60      1.3469E+00   1.6510E-01   2.2979E+00   4.1917E-02
      70      1.5805E+00   1.9106E-01   2.6358E+00   5.1041E-02
      80      1.8013E+00   2.0884E-01   2.9793E+00   5.4562E-02
 Timings for toupper on       131072  strings.
 Length            Index   Srt. Index       Select          Tab
       1      3.1158E-02   2.2613E-02   5.9888E-02   2.0703E-02
       2      4.7943E-02   2.3659E-02   8.5656E-02   2.1608E-02
       3      5.8065E-02   2.8629E-02   1.1974E-01   2.1970E-02
       4      7.8570E-02   2.5435E-02   1.2231E-01   2.1510E-02
       5      8.8967E-02   2.9107E-02   1.6045E-01   2.3129E-02
       6      1.1268E-01   3.3980E-02   1.8520E-01   2.2927E-02
       7      1.2477E-01   3.7376E-02   2.1915E-01   2.3499E-02
       8      1.4695E-01   4.1472E-02   2.5257E-01   2.5582E-02
       9      1.6963E-01   4.7824E-02   2.8357E-01   2.4178E-02
      10      1.9015E-01   5.3079E-02   3.1403E-01   2.4218E-02
      15      2.8256E-01   7.3696E-02   4.6449E-01   2.6709E-02
      20      3.7452E-01   9.1570E-02   6.1465E-01   2.7942E-02
      25      4.6918E-01   1.1024E-01   7.6694E-01   3.0342E-02
      30      5.6488E-01   1.2770E-01   9.1970E-01   3.2273E-02
      35      6.6174E-01   1.4425E-01   1.0723E+00   3.4518E-02
      40      7.5857E-01   1.5909E-01   1.2220E+00   3.5764E-02
      50      9.5825E-01   1.8787E-01   1.5264E+00   3.9844E-02
      60      1.1596E+00   2.1147E-01   1.8312E+00   4.1755E-02
      70      1.3733E+00   2.3940E-01   2.1473E+00   5.1048E-02
      80      1.5896E+00   2.5867E-01   2.4538E+00   5.4585E-02




Feel free to reuse

Salvatore


   
   
module select_string_mod

  public select_tolower, select_toupper

  interface select_tolower
    module procedure tolowerc
  end interface

  interface select_toupper
    module procedure toupperc
  end interface

  private lcase, ucase, upper1c, lower1c
  character(len=*), parameter   :: lcase='abcdefghijklmnopqrstuvwxyz'
  character(len=*), parameter   :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ'

contains 

  function  tolowerc(string)
    character(len=*), intent(in)  :: string
    character(len=len(string))    :: tolowerc
    integer  :: i,k

    do i=1,len(string)
      tolowerc(i:i) = lower1c(string(i:i))
    enddo
  end function tolowerc

  function  toupperc(string)
    character(len=*), intent(in)  :: string
    character(len=len(string))    :: toupperc
    integer  :: i,k

    do i=1,len(string)
      toupperc(i:i) = upper1c(string(i:i))
    enddo
  end function toupperc


  function  lower1c(ch)
    character(len=1), intent(in) :: ch
    character(len=1)             :: lower1c

    select case(ch) 
    case ('A')
      lower1c = 'a'
    case ('B')
      lower1c = 'b'
    case ('C')
      lower1c = 'c'
    case ('D')
      lower1c = 'd'
    case ('E')
      lower1c = 'e'
    case ('F')
      lower1c = 'f'
    case ('G')
      lower1c = 'g'
    case ('H')
      lower1c = 'h'
    case ('I')
      lower1c = 'i'
    case ('J')
      lower1c = 'j'
    case ('K')
      lower1c = 'k'
    case ('L')
      lower1c = 'l'
    case ('M')
      lower1c = 'm'
    case ('N')
      lower1c = 'n'
    case ('O')
      lower1c = 'o'
    case ('P')
      lower1c = 'p'
    case ('Q')
      lower1c = 'q'
    case ('R')
      lower1c = 'r'
    case ('S')
      lower1c = 's'
    case ('T')
      lower1c = 't'
    case ('U')
      lower1c = 'u'
    case ('V')
      lower1c = 'v'
    case ('W')
      lower1c = 'w'
    case ('X')
      lower1c = 'x'
    case ('Y')
      lower1c = 'y'
    case ('Z')
      lower1c = 'z'
    case default
      lower1c = ch 
    end select
  end function lower1c

  function  upper1c(ch)
    character(len=1), intent(in) :: ch
    character(len=1)             :: upper1c

    select case(ch) 
    case ('a')
      upper1c = 'A'
    case ('b')
      upper1c = 'B'
    case ('c')
      upper1c = 'C'
    case ('d')
      upper1c = 'D'
    case ('e')
      upper1c = 'E'
    case ('f')
      upper1c = 'F'
    case ('g')
      upper1c = 'G'
    case ('h')
      upper1c = 'H'
    case ('i')
      upper1c = 'I'
    case ('j')
      upper1c = 'J'
    case ('k')
      upper1c = 'K'
    case ('l')
      upper1c = 'L'
    case ('m')
      upper1c = 'M'
    case ('n')
      upper1c = 'N'
    case ('o')
      upper1c = 'O'
    case ('p')
      upper1c = 'P'
    case ('q')
      upper1c = 'Q'
    case ('r')
      upper1c = 'R'
    case ('s')
      upper1c = 'S'
    case ('t')
      upper1c = 'T'
    case ('u')
      upper1c = 'U'
    case ('v')
      upper1c = 'V'
    case ('w')
      upper1c = 'W'
    case ('x')
      upper1c = 'X'
    case ('y')
      upper1c = 'Y'
    case ('z')
      upper1c = 'Z'
    case default
      upper1c = ch 
    end select
  end function upper1c


end module select_string_mod


module index_string_mod

  public index_tolower, index_toupper
  interface index_tolower
    module procedure tolowerc
  end interface

  interface index_toupper
    module procedure toupperc
  end interface

  private
  character(len=*), parameter   :: lcase='abcdefghijklmnopqrstuvwxyz'
  character(len=*), parameter   :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ'

contains 

  function  tolowerc(string)
    character(len=*), intent(in)  :: string
    character(len=len(string))    :: tolowerc
    integer  :: i,k

    do i=1,len(string)
      k = index(ucase,string(i:i))
      if (k /=0 ) then 
        tolowerc(i:i) = lcase(k:k)
      else          
        tolowerc(i:i) = string(i:i)
      end if
    enddo
  end function tolowerc

  function  toupperc(string)
    character(len=*), intent(in)  :: string
    character(len=len(string))    :: toupperc
    integer  :: i,k

    do i=1,len(string)
      k = index(lcase,string(i:i))
      if (k /=0 ) then 
        toupperc(i:i) = ucase(k:k)
      else          
        toupperc(i:i) = string(i:i)
      end if
    enddo
  end function toupperc

end module index_string_mod



module srt_indx_string_mod

  public srt_indx_tolower, srt_indx_toupper
  interface srt_indx_tolower
    module procedure tolowerc
  end interface

  interface srt_indx_toupper
    module procedure toupperc
  end interface

  private

  character(len=*), parameter   :: lcase='abcdefghijklmnopqrstuvwxyz'
  character(len=*), parameter   :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ'

contains 

  function idx_bsrch(key,v) result(ipos)
    
      implicit none
      integer :: ipos
      character key
      character(len=*)  v
      
      integer lb, ub, m
      
      
      lb = 1 
      ub = len(v)
      ipos = 0 
      
      do 
        if (lb > ub) exit
        m = (lb+ub)/2
        if (key.eq.v(m:m))  then
          ipos = m 
          exit
        else if (key.lt.v(m:m))  then
          ub = m-1
        else 
          lb = m + 1
        end if
      enddo
      return
    end function idx_bsrch



  function  tolowerc(string)
    character(len=*), intent(in)  :: string
    character(len=len(string))    :: tolowerc
    integer  :: i,k

    do i=1,len(string)
      k = idx_bsrch(string(i:i),ucase)
      if (k /= 0) then 
        tolowerc(i:i) = lcase(k:k)
      else          
        tolowerc(i:i) = string(i:i)
      end if
    enddo
  end function tolowerc

  function  toupperc(string)
    character(len=*), intent(in)  :: string
    character(len=len(string))    :: toupperc
    integer  :: i,k

    do i=1,len(string)
      k = idx_bsrch(string(i:i),lcase)
      if (k /= 0) then 
        toupperc(i:i) = ucase(k:k)
      else          
        toupperc(i:i) = string(i:i)
      end if
    enddo
  end function toupperc

end module srt_indx_string_mod


module tab_string_mod

  public tab_tolower, tab_toupper, tab_string_init
  interface tab_tolower
    module procedure tolowerc
  end interface

  interface tab_toupper
    module procedure toupperc
  end interface

  private

  character(len=*), parameter   :: lcase='abcdefghijklmnopqrstuvwxyz'
  character(len=*), parameter   :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ'

  integer, save   :: mxtab=-1
  character(len=1), allocatable, save :: tolower_tab(:), toupper_tab(:)


contains 

  subroutine tab_string_init()
    implicit none 
    integer i,j 
    if (allocated(tolower_tab)) deallocate(tolower_tab)
    if (allocated(toupper_tab)) deallocate(toupper_tab)
    mxtab = 0
    do i=1,len(lcase)
      mxtab = max(mxtab,ichar(lcase(i:i)))
    end do
    do i=1,len(ucase)
      mxtab = max(mxtab,ichar(ucase(i:i)))
    end do
  
    allocate(tolower_tab(0:mxtab), toupper_tab(0:mxtab),stat=i)
    if (i/=0) then
      write(0,*) 'ALlocation error in tab_init'
      stop
    end if
    
    do i=0, mxtab
      tolower_tab(i) = achar(i)
      toupper_tab(i) = achar(i)
    end do
    do i=1,len(lcase)
      j = ichar(lcase(i:i))
      toupper_tab(j) = ucase(i:i)
    end do
    do i=1,len(ucase)
      j = ichar(ucase(i:i))
      tolower_tab(j) = lcase(i:i)
    end do

  end subroutine tab_string_init


  function  tolowerc(string)
    character(len=*), intent(in)  :: string
    character(len=len(string))    :: tolowerc
    integer  :: i,k

    if (mxtab < 0) call tab_string_init()
    do i=1,len(string)
      k = ichar(string(i:i))
      if (k <= mxtab) then 
        tolowerc(i:i) = tolower_tab(k)
      else          
        tolowerc(i:i) = string(i:i)
      end if
    enddo
  end function tolowerc

  function  toupperc(string)
    character(len=*), intent(in)  :: string
    character(len=len(string))    :: toupperc
    integer  :: i,k

    if (mxtab < 0) call tab_string_init()
    do i=1,len(string)
      k = ichar(string(i:i))
      if (k <= mxtab) then 
        toupperc(i:i) = toupper_tab(k)
      else          
        toupperc(i:i) = string(i:i)
      end if
    enddo
  end function toupperc

end module tab_string_mod

program try_string
  use tab_string_mod
  use select_string_mod
  use srt_indx_string_mod
  use index_string_mod

  implicit none 

  character(len=*), parameter   :: lcase='abcdefghijklmnopqrstuvwxyz'
  character(len=*), parameter   :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  integer, parameter :: ltb=20
  integer, parameter :: itab(ltb)=(/1,2,3,4,5,6,7,8,9,10,15,20,25,30,35,40,50,60,70,80/)
  integer   :: i,j,k,mxtab
  character :: c
  character(len=80) :: strings(128*1024), dummy
  real(kind(1.d0)) :: t0, t1, t2, t3, etime, ttab, tsel, tidx, tsrt
  external :: etime


  do i=1, size(strings)
    read(*,'(a80)',err=200) strings(i)
  end do
200 continue

  write(*,*) 'Timings for tolower on ' ,size(strings),' strings.'
  write(*,'(a8,3x,4(3x,a10))') 'Length ','Index', 'Srt. Index','Select','Tab'
  do j=1,ltb
    k=itab(j)
    t0 = etime()
    do i=1, size(strings)
      dummy(1:k) =  tab_tolower(strings(i)(1:k))
    end do
    ttab = etime()-t0

    t0 = etime()
    do i=1, size(strings)
      dummy(1:k) =  select_tolower(strings(i)(1:k))
    end do
    tsel = etime()-t0


    t0 = etime()
    do i=1, size(strings)
      dummy(1:k) =  srt_indx_tolower(strings(i)(1:k))
    end do
    tsrt = etime()-t0


    t0 = etime()
    do i=1, size(strings)
      dummy(1:k) =  index_tolower(strings(i)(1:k))
    end do
    tidx = etime()-t0
    write(*,'(i8,3x,4(3x,es10.4))') k,tidx,tsrt,tsel,ttab
  end do

  write(*,*) 'Timings for toupper on ' ,size(strings),' strings.'
  write(*,'(a8,3x,4(3x,a10))') 'Length ','Index', 'Srt. Index','Select','Tab'
  do j=1,ltb
    k=itab(j)
    t0 = etime()
    do i=1, size(strings)
      dummy(1:k) =  tab_toupper(strings(i)(1:k))
    end do
    ttab = etime()-t0

    t0 = etime()
    do i=1, size(strings)
      dummy(1:k) =  select_toupper(strings(i)(1:k))
    end do
    tsel = etime()-t0


    t0 = etime()
    do i=1, size(strings)
      dummy(1:k) =  srt_indx_toupper(strings(i)(1:k))
    end do
    tsrt = etime()-t0


    t0 = etime()
    do i=1, size(strings)
      dummy(1:k) =  index_toupper(strings(i)(1:k))
    end do
    tidx = etime()-t0
    write(*,'(i8,3x,4(3x,es10.4))') k,tidx,tsrt,tsel,ttab
  end do


end program try_string
#include <sys/time.h>
#include <stdio.h>

double timef_() 
{
  struct timeval tt;
  struct timezone tz;
  double temp;
  if (gettimeofday(&tt,&tz) != 0) {
    fprintf(stderr,"Fatal error for gettimeofday ??? \n");
    exit(-1);
  }
  temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3;
  return(temp);
}
double timef() 
{
  struct timeval tt;
  struct timezone tz;
  double temp;
  if (gettimeofday(&tt,&tz) != 0) {
    fprintf(stderr,"Fatal error for gettimeofday ??? \n");
    exit(-1);
  }
  temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3;
  return(temp);
}

double etime() 
{
  struct timeval tt;
  struct timezone tz;
  double temp;
  if (gettimeofday(&tt,&tz) != 0) {
    fprintf(stderr,"Fatal error for gettimeofday ??? \n");
    exit(-1);
  }
  temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
  return(temp);
}

double etime_() 
{
  struct timeval tt;
  struct timezone tz;
  double temp;
  if (gettimeofday(&tt,&tz) != 0) {
    fprintf(stderr,"Fatal error for gettimeofday ??? \n");
    exit(-1);
  }
  temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
  return(temp);
}

double etimef() 
{
  struct timeval tt;
  struct timezone tz;
  double temp;
  if (gettimeofday(&tt,&tz) != 0) {
    fprintf(stderr,"Fatal error for gettimeofday ??? \n");
    exit(-1);
  }
  temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
  return(temp);
}

double etimef_() 
{
  struct timeval tt;
  struct timezone tz;
  double temp;
  if (gettimeofday(&tt,&tz) != 0) {
    fprintf(stderr,"Fatal error for gettimeofday ??? \n");
    exit(-1);
  }
  temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
  return(temp);
}




Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]