This is the mail archive of the
fortran@gcc.gnu.org
mailing list for the GNU Fortran project.
String case (was: Re: Accepts invalid?)
- From: Salvatore Filippone <salvatore dot filippone at uniroma2 dot it>
- To: Fortran at gcc dot gnu dot org
- Date: Sat, 30 Aug 2008 18:44:09 +0200
- Subject: String case (was: Re: Accepts invalid?)
- Reply-to: salvatore dot filippone at uniroma2 dot it
As for converting case in Fortran, and the example given by Daniel
Franke, there are at least three other approaches:
1. roll your own version of INDEX exploiting the fact that the
lowercase/uppercase strings are sorted;
2. Use a giant SELECT statement
SELECT CASE(in(i:i))
CASE('A')
out(i:i) = 'a'
.....
CASE DEFAULT
out(i:i) = in(i:i)
3. Build a table such that
tolowertab(i) = achar(i)
everywhere, except for
tolowertab(ichar('A')) = 'a'
etc.
The attached code read a bunch (128K) lines of 80 characters from a
file, then applies in succession the four algorithms to substrings of
the lines, with growing lengths. The input file is a mixture of Fortran
90, C and TeX source files.
The results with gfortran 4.3.1 -O3 -march=native on my Core 2 duo say
that the tabular approach wins hands down for long strings, but the
binary search index is also good for short strings. The SELECT is really
bad.
YMMV.
Note that this does NOT take into account accented letters, something
that my native language would want, but then, in that case, you're
probably better off with some other tools....
Timings for tolower on 131072 strings.
Length Index Srt. Index Select Tab
1 4.3402E-02 2.2024E-02 7.0476E-02 2.1090E-02
2 6.6103E-02 2.4197E-02 1.1923E-01 2.2238E-02
3 8.8907E-02 2.7437E-02 1.6985E-01 2.2252E-02
4 1.1021E-01 2.8809E-02 2.1861E-01 2.1893E-02
5 1.3308E-01 3.0973E-02 2.6929E-01 2.2461E-02
6 1.5539E-01 3.2335E-02 3.1678E-01 2.2976E-02
7 1.7803E-01 3.4322E-02 3.6537E-01 2.3674E-02
8 1.9977E-01 4.0478E-02 3.7977E-01 2.3372E-02
9 2.2328E-01 4.2629E-02 4.1650E-01 2.4409E-02
10 2.4483E-01 4.6903E-02 4.5040E-01 2.4451E-02
15 3.5693E-01 5.9450E-02 6.3592E-01 2.6715E-02
20 4.6643E-01 7.2292E-02 8.3001E-01 2.7864E-02
25 5.7704E-01 8.5622E-02 1.0197E+00 3.0456E-02
30 6.8782E-01 9.8541E-02 1.2019E+00 3.2168E-02
35 7.9851E-01 1.1138E-01 1.3946E+00 3.4454E-02
40 9.0826E-01 1.2303E-01 1.5804E+00 3.5742E-02
50 1.1306E+00 1.4602E-01 1.9465E+00 3.9895E-02
60 1.3469E+00 1.6510E-01 2.2979E+00 4.1917E-02
70 1.5805E+00 1.9106E-01 2.6358E+00 5.1041E-02
80 1.8013E+00 2.0884E-01 2.9793E+00 5.4562E-02
Timings for toupper on 131072 strings.
Length Index Srt. Index Select Tab
1 3.1158E-02 2.2613E-02 5.9888E-02 2.0703E-02
2 4.7943E-02 2.3659E-02 8.5656E-02 2.1608E-02
3 5.8065E-02 2.8629E-02 1.1974E-01 2.1970E-02
4 7.8570E-02 2.5435E-02 1.2231E-01 2.1510E-02
5 8.8967E-02 2.9107E-02 1.6045E-01 2.3129E-02
6 1.1268E-01 3.3980E-02 1.8520E-01 2.2927E-02
7 1.2477E-01 3.7376E-02 2.1915E-01 2.3499E-02
8 1.4695E-01 4.1472E-02 2.5257E-01 2.5582E-02
9 1.6963E-01 4.7824E-02 2.8357E-01 2.4178E-02
10 1.9015E-01 5.3079E-02 3.1403E-01 2.4218E-02
15 2.8256E-01 7.3696E-02 4.6449E-01 2.6709E-02
20 3.7452E-01 9.1570E-02 6.1465E-01 2.7942E-02
25 4.6918E-01 1.1024E-01 7.6694E-01 3.0342E-02
30 5.6488E-01 1.2770E-01 9.1970E-01 3.2273E-02
35 6.6174E-01 1.4425E-01 1.0723E+00 3.4518E-02
40 7.5857E-01 1.5909E-01 1.2220E+00 3.5764E-02
50 9.5825E-01 1.8787E-01 1.5264E+00 3.9844E-02
60 1.1596E+00 2.1147E-01 1.8312E+00 4.1755E-02
70 1.3733E+00 2.3940E-01 2.1473E+00 5.1048E-02
80 1.5896E+00 2.5867E-01 2.4538E+00 5.4585E-02
Feel free to reuse
Salvatore
module select_string_mod
public select_tolower, select_toupper
interface select_tolower
module procedure tolowerc
end interface
interface select_toupper
module procedure toupperc
end interface
private lcase, ucase, upper1c, lower1c
character(len=*), parameter :: lcase='abcdefghijklmnopqrstuvwxyz'
character(len=*), parameter :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
contains
function tolowerc(string)
character(len=*), intent(in) :: string
character(len=len(string)) :: tolowerc
integer :: i,k
do i=1,len(string)
tolowerc(i:i) = lower1c(string(i:i))
enddo
end function tolowerc
function toupperc(string)
character(len=*), intent(in) :: string
character(len=len(string)) :: toupperc
integer :: i,k
do i=1,len(string)
toupperc(i:i) = upper1c(string(i:i))
enddo
end function toupperc
function lower1c(ch)
character(len=1), intent(in) :: ch
character(len=1) :: lower1c
select case(ch)
case ('A')
lower1c = 'a'
case ('B')
lower1c = 'b'
case ('C')
lower1c = 'c'
case ('D')
lower1c = 'd'
case ('E')
lower1c = 'e'
case ('F')
lower1c = 'f'
case ('G')
lower1c = 'g'
case ('H')
lower1c = 'h'
case ('I')
lower1c = 'i'
case ('J')
lower1c = 'j'
case ('K')
lower1c = 'k'
case ('L')
lower1c = 'l'
case ('M')
lower1c = 'm'
case ('N')
lower1c = 'n'
case ('O')
lower1c = 'o'
case ('P')
lower1c = 'p'
case ('Q')
lower1c = 'q'
case ('R')
lower1c = 'r'
case ('S')
lower1c = 's'
case ('T')
lower1c = 't'
case ('U')
lower1c = 'u'
case ('V')
lower1c = 'v'
case ('W')
lower1c = 'w'
case ('X')
lower1c = 'x'
case ('Y')
lower1c = 'y'
case ('Z')
lower1c = 'z'
case default
lower1c = ch
end select
end function lower1c
function upper1c(ch)
character(len=1), intent(in) :: ch
character(len=1) :: upper1c
select case(ch)
case ('a')
upper1c = 'A'
case ('b')
upper1c = 'B'
case ('c')
upper1c = 'C'
case ('d')
upper1c = 'D'
case ('e')
upper1c = 'E'
case ('f')
upper1c = 'F'
case ('g')
upper1c = 'G'
case ('h')
upper1c = 'H'
case ('i')
upper1c = 'I'
case ('j')
upper1c = 'J'
case ('k')
upper1c = 'K'
case ('l')
upper1c = 'L'
case ('m')
upper1c = 'M'
case ('n')
upper1c = 'N'
case ('o')
upper1c = 'O'
case ('p')
upper1c = 'P'
case ('q')
upper1c = 'Q'
case ('r')
upper1c = 'R'
case ('s')
upper1c = 'S'
case ('t')
upper1c = 'T'
case ('u')
upper1c = 'U'
case ('v')
upper1c = 'V'
case ('w')
upper1c = 'W'
case ('x')
upper1c = 'X'
case ('y')
upper1c = 'Y'
case ('z')
upper1c = 'Z'
case default
upper1c = ch
end select
end function upper1c
end module select_string_mod
module index_string_mod
public index_tolower, index_toupper
interface index_tolower
module procedure tolowerc
end interface
interface index_toupper
module procedure toupperc
end interface
private
character(len=*), parameter :: lcase='abcdefghijklmnopqrstuvwxyz'
character(len=*), parameter :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
contains
function tolowerc(string)
character(len=*), intent(in) :: string
character(len=len(string)) :: tolowerc
integer :: i,k
do i=1,len(string)
k = index(ucase,string(i:i))
if (k /=0 ) then
tolowerc(i:i) = lcase(k:k)
else
tolowerc(i:i) = string(i:i)
end if
enddo
end function tolowerc
function toupperc(string)
character(len=*), intent(in) :: string
character(len=len(string)) :: toupperc
integer :: i,k
do i=1,len(string)
k = index(lcase,string(i:i))
if (k /=0 ) then
toupperc(i:i) = ucase(k:k)
else
toupperc(i:i) = string(i:i)
end if
enddo
end function toupperc
end module index_string_mod
module srt_indx_string_mod
public srt_indx_tolower, srt_indx_toupper
interface srt_indx_tolower
module procedure tolowerc
end interface
interface srt_indx_toupper
module procedure toupperc
end interface
private
character(len=*), parameter :: lcase='abcdefghijklmnopqrstuvwxyz'
character(len=*), parameter :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
contains
function idx_bsrch(key,v) result(ipos)
implicit none
integer :: ipos
character key
character(len=*) v
integer lb, ub, m
lb = 1
ub = len(v)
ipos = 0
do
if (lb > ub) exit
m = (lb+ub)/2
if (key.eq.v(m:m)) then
ipos = m
exit
else if (key.lt.v(m:m)) then
ub = m-1
else
lb = m + 1
end if
enddo
return
end function idx_bsrch
function tolowerc(string)
character(len=*), intent(in) :: string
character(len=len(string)) :: tolowerc
integer :: i,k
do i=1,len(string)
k = idx_bsrch(string(i:i),ucase)
if (k /= 0) then
tolowerc(i:i) = lcase(k:k)
else
tolowerc(i:i) = string(i:i)
end if
enddo
end function tolowerc
function toupperc(string)
character(len=*), intent(in) :: string
character(len=len(string)) :: toupperc
integer :: i,k
do i=1,len(string)
k = idx_bsrch(string(i:i),lcase)
if (k /= 0) then
toupperc(i:i) = ucase(k:k)
else
toupperc(i:i) = string(i:i)
end if
enddo
end function toupperc
end module srt_indx_string_mod
module tab_string_mod
public tab_tolower, tab_toupper, tab_string_init
interface tab_tolower
module procedure tolowerc
end interface
interface tab_toupper
module procedure toupperc
end interface
private
character(len=*), parameter :: lcase='abcdefghijklmnopqrstuvwxyz'
character(len=*), parameter :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
integer, save :: mxtab=-1
character(len=1), allocatable, save :: tolower_tab(:), toupper_tab(:)
contains
subroutine tab_string_init()
implicit none
integer i,j
if (allocated(tolower_tab)) deallocate(tolower_tab)
if (allocated(toupper_tab)) deallocate(toupper_tab)
mxtab = 0
do i=1,len(lcase)
mxtab = max(mxtab,ichar(lcase(i:i)))
end do
do i=1,len(ucase)
mxtab = max(mxtab,ichar(ucase(i:i)))
end do
allocate(tolower_tab(0:mxtab), toupper_tab(0:mxtab),stat=i)
if (i/=0) then
write(0,*) 'ALlocation error in tab_init'
stop
end if
do i=0, mxtab
tolower_tab(i) = achar(i)
toupper_tab(i) = achar(i)
end do
do i=1,len(lcase)
j = ichar(lcase(i:i))
toupper_tab(j) = ucase(i:i)
end do
do i=1,len(ucase)
j = ichar(ucase(i:i))
tolower_tab(j) = lcase(i:i)
end do
end subroutine tab_string_init
function tolowerc(string)
character(len=*), intent(in) :: string
character(len=len(string)) :: tolowerc
integer :: i,k
if (mxtab < 0) call tab_string_init()
do i=1,len(string)
k = ichar(string(i:i))
if (k <= mxtab) then
tolowerc(i:i) = tolower_tab(k)
else
tolowerc(i:i) = string(i:i)
end if
enddo
end function tolowerc
function toupperc(string)
character(len=*), intent(in) :: string
character(len=len(string)) :: toupperc
integer :: i,k
if (mxtab < 0) call tab_string_init()
do i=1,len(string)
k = ichar(string(i:i))
if (k <= mxtab) then
toupperc(i:i) = toupper_tab(k)
else
toupperc(i:i) = string(i:i)
end if
enddo
end function toupperc
end module tab_string_mod
program try_string
use tab_string_mod
use select_string_mod
use srt_indx_string_mod
use index_string_mod
implicit none
character(len=*), parameter :: lcase='abcdefghijklmnopqrstuvwxyz'
character(len=*), parameter :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
integer, parameter :: ltb=20
integer, parameter :: itab(ltb)=(/1,2,3,4,5,6,7,8,9,10,15,20,25,30,35,40,50,60,70,80/)
integer :: i,j,k,mxtab
character :: c
character(len=80) :: strings(128*1024), dummy
real(kind(1.d0)) :: t0, t1, t2, t3, etime, ttab, tsel, tidx, tsrt
external :: etime
do i=1, size(strings)
read(*,'(a80)',err=200) strings(i)
end do
200 continue
write(*,*) 'Timings for tolower on ' ,size(strings),' strings.'
write(*,'(a8,3x,4(3x,a10))') 'Length ','Index', 'Srt. Index','Select','Tab'
do j=1,ltb
k=itab(j)
t0 = etime()
do i=1, size(strings)
dummy(1:k) = tab_tolower(strings(i)(1:k))
end do
ttab = etime()-t0
t0 = etime()
do i=1, size(strings)
dummy(1:k) = select_tolower(strings(i)(1:k))
end do
tsel = etime()-t0
t0 = etime()
do i=1, size(strings)
dummy(1:k) = srt_indx_tolower(strings(i)(1:k))
end do
tsrt = etime()-t0
t0 = etime()
do i=1, size(strings)
dummy(1:k) = index_tolower(strings(i)(1:k))
end do
tidx = etime()-t0
write(*,'(i8,3x,4(3x,es10.4))') k,tidx,tsrt,tsel,ttab
end do
write(*,*) 'Timings for toupper on ' ,size(strings),' strings.'
write(*,'(a8,3x,4(3x,a10))') 'Length ','Index', 'Srt. Index','Select','Tab'
do j=1,ltb
k=itab(j)
t0 = etime()
do i=1, size(strings)
dummy(1:k) = tab_toupper(strings(i)(1:k))
end do
ttab = etime()-t0
t0 = etime()
do i=1, size(strings)
dummy(1:k) = select_toupper(strings(i)(1:k))
end do
tsel = etime()-t0
t0 = etime()
do i=1, size(strings)
dummy(1:k) = srt_indx_toupper(strings(i)(1:k))
end do
tsrt = etime()-t0
t0 = etime()
do i=1, size(strings)
dummy(1:k) = index_toupper(strings(i)(1:k))
end do
tidx = etime()-t0
write(*,'(i8,3x,4(3x,es10.4))') k,tidx,tsrt,tsel,ttab
end do
end program try_string
#include <sys/time.h>
#include <stdio.h>
double timef_()
{
struct timeval tt;
struct timezone tz;
double temp;
if (gettimeofday(&tt,&tz) != 0) {
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
exit(-1);
}
temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3;
return(temp);
}
double timef()
{
struct timeval tt;
struct timezone tz;
double temp;
if (gettimeofday(&tt,&tz) != 0) {
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
exit(-1);
}
temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3;
return(temp);
}
double etime()
{
struct timeval tt;
struct timezone tz;
double temp;
if (gettimeofday(&tt,&tz) != 0) {
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
exit(-1);
}
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
return(temp);
}
double etime_()
{
struct timeval tt;
struct timezone tz;
double temp;
if (gettimeofday(&tt,&tz) != 0) {
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
exit(-1);
}
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
return(temp);
}
double etimef()
{
struct timeval tt;
struct timezone tz;
double temp;
if (gettimeofday(&tt,&tz) != 0) {
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
exit(-1);
}
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
return(temp);
}
double etimef_()
{
struct timeval tt;
struct timezone tz;
double temp;
if (gettimeofday(&tt,&tz) != 0) {
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
exit(-1);
}
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
return(temp);
}