This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs


> Try the following one. 1) -minline-all-stringops
> -mstringop-strategy=rep_8byte -O2 vs 1) -mstringop_strategy=libcall
> -O2.
> 
> David
> 
> 
> #include <string.h>
> #include <stdio.h>
> #include <stdlib.h>
> #ifndef LEN
> #define LEN 16
> #endif
> 
> void copy(char* s1, char* s2,int len) __attribute__((noinline));
> void copy(char* s1, char* s2,int len)
> {
>    memcpy(s2,s1,len);
> }

I guess the catch here is that you force the copy to be noinline and thus you
eliminate the benefits of inlined sequence.  With inline stringop one saves
regalloc and often can get rid of the alignment tests.

This is script I use to tune the tables.

Honza

test()
{
rm -f a.out
cat <<END | $1 -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize -
#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2)
/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/
$type t[BUFFER_SIZE];
main()
{
  unsigned int i;
  for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++)
#ifdef test_memset
    __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
#else
    __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
#endif
  return 0;
}
END
TIME=`/usr/bin/time -f "%E" ./a.out 2>&1`
echo -n " "$TIME
echo $TIME $4 >>/tmp/accum
}

testrow()
{
echo -n "" >/tmp/accum
printf "block size %7i" $3
test "$2" "$3" "-mstringop-strategy=libcall" libcall
test "$2" "$3" "-mstringop-strategy=rep_byte -malign-stringops" rep1
test "$2" "$3" "-mstringop-strategy=rep_byte -mno-align-stringops" rep1noalign
test "$2" "$3" "-mstringop-strategy=rep_4byte -malign-stringops" rep4
test "$2" "$3" "-mstringop-strategy=rep_4byte -mno-align-stringops" rep4noalign
if [ "$mode" == 64 ]
then
test "$2" "$3" "-mstringop-strategy=rep_8byte -malign-stringops" rep8
test "$2" "$3" "-mstringop-strategy=rep_8byte -mno-align-stringops" rep8noalign
fi
test "$2" "$3" "-mstringop-strategy=loop -malign-stringops"  loop
test "$2" "$3" "-mstringop-strategy=loop -mno-align-stringops"  loopnoalign
test "$2" "$3" "-mstringop-strategy=unrolled_loop -malign-stringops" unrl
test "$2" "$3" "-mstringop-strategy=unrolled_loop -mno-align-stringops" unrlnoalign
test "$2" "$3" "-mstringop-strategy=sse_loop -malign-stringops" sse
test "$2" "$3" "-mstringop-strategy=sse_loop -mno-align-stringops -msse2" ssenoalign
test "$2" "$3" "-mstringop-strategy=byte_loop" byte
best=`cat /tmp/accum | sort | head -1`
test "$2" "$3" " -fprofile-generate" >/dev/null 2>&1
test "$2" "$3" " -fprofile-use"
test "$2" "$3" " -minline-stringops-dynamically"
echo " best: $best"
}

test_all_sizes()
{
if [ "$mode" == 64 ]
then
echo "                   libcall   rep1   noalg    rep4   noalg    rep8   noalg    loop   noalg    unrl   noalg    sse   noalg    byte profiled dynamic"
else
echo "                   libcall   rep1   noalg    rep4   noalg    loop   noalg    unrl   noalg    sse    noalg    byte profiled dynamic"
fi
#for size in 1 2 3 4 6 8 10 12 14 16 24 32 48 64 128 256 512 1024 4096 8192 81920 819200 8192000
#for size in 8192000 819200 81920 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 5 4 3 2 1
for size in 8192000 819200 81920 20480 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 4 1
#for size in 128 256 1024 4096 8192 81920 819200
do
testrow "$1" "$2" $size
done
}

mode=$1
shift
export memsize=$1
shift
cmdline=$*
if [ "$mode" != 32 ]
then
  if [ "$mode" != 64 ]
  then
    echo "Usage:"
    echo "test_stringop mode size cmdline"
    echo "mode is either 32 or 64"
    echo "size is amount of memory copied in each test.  Should be chosed small enough so runtime is less than minute for each test and sorting works"
    echo "Example: test_stringop 32 640000000 ./xgcc -B ./ -march=pentium3"
    exit
  fi
fi
echo "memcpy mode:$mode size:$memsize"
export STRINGOP=""
type=char
test_all_sizes $mode "$cmdline -m$mode"
echo "Aligned"
type=long
test_all_sizes $mode "$cmdline -m$mode"
echo "memset"
type=char
export STRINGOP="-Dtest_memset=1"
test_all_sizes $mode "$cmdline -m$mode"
echo "Aligned"
type=long
test_all_sizes $mode "$cmdline -m$mode"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]