This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, i386]: Do not emit "cld" instructions


> On 12/5/06, Jan Hubicka <hubicka@ucw.cz> wrote:
> 
> >This is pretty high.  Would be possible for you to rerun the
> >test_stringops script on P4 machine after removing the CLD?  If it
> >really is 48 cycles, it should show difference in the preffered memcpy
> >codegen.
> 
> Yes, I got different results (attached) on p4 3.2 GHz. This was
> measured on otherwise idle machine, and it is somehow faster than my
> previous results.
> 
> I suspect that timings are faster due to the fact that this is HT
> machine and some other compute intesive task was running in parallel
> during the benchmark. But in any case, relative differences should be
> the same, but they are not.

Thanks,
it ineed makes rep;movsl quite a bit faster for small blocks!
Basically we should use rep;movsl on those machines almost always for
memcpy now, for memset rep;stosl wins after 48bytes up.
I am testing the attached patch I would like to commit as obvious if it
passes once CLD autogen is gone.  I wonder how it changes povray
benchmarks? 

Honza

Index: i386.c
===================================================================
*** i386.c	(revision 119579)
--- i386.c	(working copy)
*************** struct processor_costs pentium4_cost = {
*** 655,663 ****
    COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
    COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
    COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
!   {{libcall, {{12, loop_1_byte}, {64, loop}, {-1, rep_prefix_4_byte}}},
     DUMMY_STRINGOP_ALGS},
!   {{libcall, {{6, loop_1_byte}, {64, loop}, {20480, rep_prefix_4_byte},
     {-1, libcall}}},
     DUMMY_STRINGOP_ALGS},
  };
--- 655,663 ----
    COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
    COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
    COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
!   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
     DUMMY_STRINGOP_ALGS},
!   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
     {-1, libcall}}},
     DUMMY_STRINGOP_ALGS},
  };
*************** struct processor_costs nocona_cost = {
*** 713,722 ****
    COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
    COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
    COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
!   {{libcall, {{12, loop_1_byte}, {64, loop}, {-1, rep_prefix_4_byte}}},
     {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
  	      {100000, unrolled_loop}, {-1, libcall}}}},
!   {{libcall, {{6, loop_1_byte}, {64, loop}, {20480, rep_prefix_4_byte},
     {-1, libcall}}},
     {libcall, {{24, loop}, {64, unrolled_loop},
  	      {8192, rep_prefix_8_byte}, {-1, libcall}}}}
--- 713,722 ----
    COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
    COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
    COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
!   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
     {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
  	      {100000, unrolled_loop}, {-1, libcall}}}},
!   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
     {-1, libcall}}},
     {libcall, {{24, loop}, {64, unrolled_loop},
  	      {8192, rep_prefix_8_byte}, {-1, libcall}}}}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]