Pentium friendly ffs code
Jan Hubicka
hubicka@atrey.karlin.mff.cuni.cz
Fri Oct 1 14:14:00 GMT 1999
Hi
This patch adds emiting of bfs replacement code for Pentium according to the
Intel Optimizing Manual. It makes gcc to beat vc in yet another benchmark :)
Fri Oct 1 06:47:45 CEST 1999 Jan Hubicka <hubicka@freesoft.cz>
* i386.md (ffs expander): Emit Pentium friendly code for
TARGET_PENTIUM.
*** i386.mc.old Thu Sep 30 20:33:58 1999
--- i386.md Fri Oct 1 11:19:54 1999
***************
*** 6778,6783 ****
--- 6834,6897 ----
const0_rtx),
tmp,
out)));
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
+ emit_move_insn (operands[0], out);
+ }
+
+ /* Pentium bsf instruction is extremly slow. Following code is recommended by
+ the Optimizing Manual as resonable replacement:
+ TEST EAX,EAX
+ JZ SHORT BS2
+ XOR ECX,ECX
+ MOV DWORD PTR [TEMP+4],ECX
+ SUB ECX,EAX
+ AND EAX,ECX
+ MOV DWORD PTR [TEMP],EAX
+ FILD QWORD PTR [TEMP]
+ FSTP QWORD PTR [TEMP]
+ WAIT ; WAIT only needed for compatibility with
+ ; earlier processors
+ MOV ECX, DWORD PTR [TEMP+4]
+ SHR ECX,20
+ SUB ECX,3FFH
+ TEST EAX,EAX ; clear zero flag
+ BS2:
+ Following piece of code expand ffs to similar beast.
+ */
+
+ else if (TARGET_PENTIUM && !optimize_size && TARGET_80387)
+ {
+ rtx label = gen_label_rtx ();
+ rtx jump, lo, hi;
+ rtx mem = assign_386_stack_local (DImode, 0);
+ rtx fptmp = gen_reg_rtx (DFmode);
+ split_di (&mem, 1, &lo, &hi);
+
+ emit_move_insn (out, const0_rtx);
+ emit_insn (gen_cmpsi_0 (in, const0_rtx));
+
+ jump = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ jump = gen_rtx_EQ (VOIDmode, jump, const0_rtx);
+ jump = gen_rtx_IF_THEN_ELSE (VOIDmode, jump,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
+ JUMP_LABEL (jump) = label;
+
+ emit_move_insn (hi, out);
+ emit_insn (gen_subsi3 (out, out, in));
+ emit_insn (gen_andsi3 (out, out, in));
+ emit_move_insn (lo, out);
+ emit_insn (gen_floatdidf2 (fptmp,mem));
+ emit_move_insn (gen_rtx_MEM (DFmode, XEXP (mem, 0)), fptmp);
+ emit_move_insn (out, hi);
+ emit_insn (gen_lshrsi3 (out, out, GEN_INT (20)));
+ emit_insn (gen_subsi3 (out, out, GEN_INT (0x3fe)));
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operands[0], out);
}
else
{
More information about the Gcc-patches
mailing list