fread() using mmaped buffer for IO

Nobin Mathew nobin.mathew@gmail.com
Tue Jun 22 14:55:00 GMT 2010


Hi,

I have a problem where my application is creating lots of minor page
faults. Initially I thought it is a problem with malloc(), but this is
fixed in library. Now I traced it to buffered IO calls. glibc is using
mmap to create a buffer of size 4K i.e. one page. I can avoid this by
providing our own buffer using setvbuf/setbuf.

I made three more programs to test this, read_bufmmap.c which uses
buffered IO with library mmaped buffer, read_bufsetvbuf.c which uses
buffered IO with user provided buffer and read_nobuffer.c which uses
direct IO calls. When you execute this and catch the system calls
using strace, you can see for library mmaped (read_bufmmap.c) buffer
case, library  is mmaping a page (4096bytes) to use as read() buffer .

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0x2ade80944000

see this call after every fopen() and before every first fread().

But if we see read_bufsetvbuf.c strace this mmap() call after every
fopen() is not there. See the performance table below to understand
more.

10000 loops

[root@mysys testnew]# time ./read_nobuffer       --> direct IO
real    0m0.922s
user    0m0.210s
sys     0m0.711s

[root@mysys testnew]# time ./read_bufmmap     --> buffered IO with library mmap
real    0m0.321s
user    0m0.106s
sys     0m0.215s

[root@mysys testnew]# time ./read_bufsetvbuf    --> buffered IO with
user provided buffer,setvbuf()
real    0m0.178s
user    0m0.071s
sys     0m0.106s

[root@mysys testnew]#

 Minor Page Faults (see under faults/s)

[root@mysys ~]# sar -B 1 10000
Linux 2.6.18-120.el5 (mysys)     12/16/2009

06:17:43 PM  pgpgin/s pgpgout/s   fault/s  majflt/s
06:17:44 PM      0.00      0.00     51.00      0.00
06:17:45 PM      0.00      0.00     24.00      0.00
06:17:46 PM      0.00      0.00     12.00      0.00
06:17:47 PM      0.00      0.00     12.00      0.00
06:17:48 PM      0.00     31.68     11.88      0.00
06:17:49 PM      0.00      4.04     12.12      0.00
06:17:50 PM      0.00      0.00     12.00      0.00
06:17:51 PM      0.00      0.00     14.00      0.00
06:17:52 PM      0.00      0.00     12.00      0.00
06:17:53 PM      0.00      0.00    163.00      0.00   ---> direct IO
06:17:54 PM      0.00      0.00     36.00      0.00
06:17:55 PM      0.00      0.00     14.00      0.00
06:17:56 PM      0.00      0.00  10213.00      0.00  ---> buffered IO
with library mmap
06:17:57 PM      0.00      0.00     13.27      0.00
06:17:58 PM      0.00     28.57    217.35      0.00  ---> buffered IO
with user provided buffer,setvbuf()
06:17:59 PM      0.00      0.00     12.24      0.00
06:18:00 PM      0.00      0.00     12.12      0.00
06:18:01 PM      0.00      0.00     12.24      0.00

Library using mmap() for creating buffer is not clear, I don’t know
why it uses mmap(), may be to give more guaranteed file open (I think
mmap() will be more successful than malloc() during system load) or
not to eat up process heap when large number of files are open, but
this approach by default(when setvbuf() is not used) reduces program
performance.

Why this mmap approach is used? Is there any way other
than setvbuf/setbuf and direct IO to avoid minor pagefaults.

Correct me if I am wrong any where.

I found a post regarding this
http://sources.redhat.com/ml/libc-alpha/2006-04/msg00080.html, but no
further discussions.

Thanks in Advance.

Thanks
Nobin Mathew


Source code for programs I used is pasted below.

[root@mysys testnew]# cat read_nobuffer.c
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>


int main(void)
{
       char buffer[256];
       int i = 0;

       int fp;
       while(i++ < 10000)
       {
       if(!(fp = open("/root/dmesg.txt", O_RDONLY)))
               return 0;
       while(read(fp, buffer, 256));
       close(fp);
       }
       return 0;
}

[root@mysys testnew]# cat read_bufmmap.c
#include <stdio.h>
#include <stdlib.h>


int main(void)
{
       char buffer[256];
       int i = 0;

       FILE *fp;
       while(i++ < 10000)
       {
       if((fp = fopen("/root/dmesg.txt", "r")) == NULL)
               return 0;
       while(!feof(fp))
               fread(buffer, 256, 1, fp);
       fclose(fp);
       }
       return 0;
}

[root@mysys testnew]# cat read_bufsetvbuf.c
#include <stdio.h>
#include <stdlib.h>

char buffer123[8192];

int main(void)
{
       char buffer[256];
       int i = 0;

       FILE *fp;
       while(i++ < 10000)
       {
       if((fp = fopen("/root/dmesg.txt", "r")) == NULL)
               return 0;
       setvbuf(fp, buffer123, _IOFBF,  4096);
       while(!feof(fp))
               fread(buffer, 256, 1, fp);
       fclose(fp);
       }
       return 0;
}



More information about the Gcc-help mailing list