[PATCH] Re: Rewrite i386 string operation expansion

Jan Hubicka jh@suse.cz
Thu Nov 30 09:32:00 GMT 2006


> Uros,
> It would be probably good idea to fill in the pentium4 code generation
> descriptor with correct values, since the code is probably just picking
> up the wrong alternatives.  If you really do have pentium4 core (and nod
> nocona or prescott), could you please run the attached benchmark with
> ./test_stringop 32 640000000 ./xgcc -B ./ -march=pentium4
> With properlly filed descriptor, GCC ought to do best withtout
> -minline-all-stringops (or with -minline-stringops-dynamically) on the
> benchmark.  Is povray memcpy/memset bound? Where I find the sources?
Hi,
forgot the script, I should probably commit to contrib.

Honza
-------------- next part --------------

test()
{
rm -f a.out
cat <<END | $1 -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize -
#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2)
/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/
char t[BUFFER_SIZE];
main()
{
  unsigned int i;
  for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++)
#ifdef test_memset
    __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
#else
    __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
#endif
  return 0;
}
END
TIME=`/usr/bin/time -f "%E" ./a.out 2>&1`
echo -n " "$TIME
echo $TIME $4 >>/tmp/accum
}

testrow()
{
echo -n "" >/tmp/accum
printf "block size %7i" $3
test "$2" "$3" "-mstringop-strategy=libcall" libcall
test "$2" "$3" "-mstringop-strategy=rep_byte" rep1
test "$2" "$3" "-mstringop-strategy=rep_byte -mno-align-stringops" rep1noalign
test "$2" "$3" "-mstringop-strategy=rep_4byte" rep4
test "$2" "$3" "-mstringop-strategy=rep_4byte -mno-align-stringops" rep4noalign
if [ "$mode" == 64 ]
then
test "$2" "$3" "-mstringop-strategy=rep_8byte" rep8
test "$2" "$3" "-mstringop-strategy=rep_8byte -mno-align-stringops" rep8noalign
fi
test "$2" "$3" "-mstringop-strategy=loop"  loop
test "$2" "$3" "-mstringop-strategy=loop -mno-align-stringops"  loopnoalign
test "$2" "$3" "-mstringop-strategy=unrolled_loop" unrl
test "$2" "$3" "-mstringop-strategy=unrolled_loop -mno-align-stringops" unrlnoalign
test "$2" "$3" "-mstringop-strategy=byte_loop" byte
best=`cat /tmp/accum | sort | head -1`
test "$2" "$3" " -fprofile-generate" >/dev/null 2>&1
test "$2" "$3" " -fprofile-use"
test "$2" "$3" " -minline-stringops-dynamically"
echo " best: $best"
}

test_all_sizes()
{
if [ "$mode" == 64 ]
then
echo "                   libcall   rep1   noalg    rep4   noalg    rep8   noalg    loop   noalg    unrl   noalg    byte profiled dynamic"
else
echo "                   libcall   rep1   noalg    rep4   noalg    loop   noalg    unrl   noalg    byte profiled dynamic"
fi
#for size in 1 2 3 4 6 8 10 12 14 16 24 32 48 64 128 256 512 1024 4096 8192 81920 819200 8192000
#for size in 8192000 819200 81920 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 5 4 3 2 1
for size in 8192000 819200 81920 20480 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 4 1
#for size in 128 256 1024 4096 8192 81920 819200
do
testrow "$1" "$2" $size
done
}

mode=$1
shift
export memsize=$1
shift
cmdline=$*
if [ "$mode" != 32 ]
then
  if [ "$mode" != 64 ]
  then
    echo "Usage:"
    echo "test_stringop mode size cmdline"
    echo "mode is either 32 or 64"
    echo "size is amount of memory copied in each test.  Should be chosed small enough so runtime is less than minute for each test and sorting works"
    echo "Example: test_stringop 32 640000000 ./xgcc -B ./ -march=pentium3"
    exit
  fi
fi
echo "memcpy mode:$mode size:$memsize"
export STRINGOP=""
test_all_sizes $mode "$cmdline -m$mode"
echo "memset"
export STRINGOP="-Dtest_memset=1"
test_all_sizes $mode "$cmdline -m$mode"


More information about the Gcc-patches mailing list