This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, i386]: Do not emit "cld" instructions
> On 12/5/06, Jan Hubicka <hubicka@ucw.cz> wrote:
>
> >This is pretty high. Would be possible for you to rerun the
> >test_stringops script on P4 machine after removing the CLD? If it
> >really is 48 cycles, it should show difference in the preffered memcpy
> >codegen.
>
> Yes, I got different results (attached) on p4 3.2 GHz. This was
> measured on otherwise idle machine, and it is somehow faster than my
> previous results.
>
> I suspect that timings are faster due to the fact that this is HT
> machine and some other compute intesive task was running in parallel
> during the benchmark. But in any case, relative differences should be
> the same, but they are not.
Thanks,
it ineed makes rep;movsl quite a bit faster for small blocks!
Basically we should use rep;movsl on those machines almost always for
memcpy now, for memset rep;stosl wins after 48bytes up.
I am testing the attached patch I would like to commit as obvious if it
passes once CLD autogen is gone. I wonder how it changes povray
benchmarks?
Honza
Index: i386.c
===================================================================
*** i386.c (revision 119579)
--- i386.c (working copy)
*************** struct processor_costs pentium4_cost = {
*** 655,663 ****
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
! {{libcall, {{12, loop_1_byte}, {64, loop}, {-1, rep_prefix_4_byte}}},
DUMMY_STRINGOP_ALGS},
! {{libcall, {{6, loop_1_byte}, {64, loop}, {20480, rep_prefix_4_byte},
{-1, libcall}}},
DUMMY_STRINGOP_ALGS},
};
--- 655,663 ----
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
! {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
DUMMY_STRINGOP_ALGS},
! {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
{-1, libcall}}},
DUMMY_STRINGOP_ALGS},
};
*************** struct processor_costs nocona_cost = {
*** 713,722 ****
COSTS_N_INSNS (3), /* cost of FABS instruction. */
COSTS_N_INSNS (3), /* cost of FCHS instruction. */
COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
! {{libcall, {{12, loop_1_byte}, {64, loop}, {-1, rep_prefix_4_byte}}},
{libcall, {{32, loop}, {20000, rep_prefix_8_byte},
{100000, unrolled_loop}, {-1, libcall}}}},
! {{libcall, {{6, loop_1_byte}, {64, loop}, {20480, rep_prefix_4_byte},
{-1, libcall}}},
{libcall, {{24, loop}, {64, unrolled_loop},
{8192, rep_prefix_8_byte}, {-1, libcall}}}}
--- 713,722 ----
COSTS_N_INSNS (3), /* cost of FABS instruction. */
COSTS_N_INSNS (3), /* cost of FCHS instruction. */
COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
! {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
{libcall, {{32, loop}, {20000, rep_prefix_8_byte},
{100000, unrolled_loop}, {-1, libcall}}}},
! {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
{-1, libcall}}},
{libcall, {{24, loop}, {64, unrolled_loop},
{8192, rep_prefix_8_byte}, {-1, libcall}}}}