Pentium friendly ffs code

Jan Hubicka hubicka@atrey.karlin.mff.cuni.cz
Sat Oct 2 07:02:00 GMT 1999


> On Fri, Oct 01, 1999 at 11:14:13PM +0200, Jan Hubicka wrote:
> > +       emit_insn (gen_cmpsi_0 (in, const0_rtx));
> > + 
> > +       jump = gen_rtx_REG (CCNOmode, FLAGS_REG);
> > +       jump = gen_rtx_EQ (VOIDmode, jump, const0_rtx);
> > +       jump = gen_rtx_IF_THEN_ELSE (VOIDmode, jump,
> > + 			          gen_rtx_LABEL_REF (VOIDmode, label),
> > + 			          pc_rtx);
> > +       jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
> > +       JUMP_LABEL (jump) = label;
> 
> Use emit_cmp_and_jump_insns.
Cool function. Thanks for the hint!

Here is updated patch.

Fri Oct 1 06:47:45 CEST 1999  Jan Hubicka  <hubicka@freesoft.cz>
	* i386.md (ffs expander): Emit Pentium friendly code for
	TARGET_PENTIUM.
*** i386.md.noa	Sat Sep 25 21:39:36 1999
--- i386.md	Fri Oct  1 22:04:10 1999
***************
*** 6778,6783 ****
--- 6778,6834 ----
  				const0_rtx),
  		    tmp,
  		    out)));
+       emit_insn (gen_addsi3 (out, out, const1_rtx));
+       emit_move_insn (operands[0], out);
+     }
+ 
+   /* Pentium bsf instruction is extremly slow.  Following code is recommended by
+      the Intel Optimizing Manual as resonable replacement:
+            TEST    EAX,EAX
+ 	   JZ      SHORT BS2
+ 	   XOR     ECX,ECX
+ 	   MOV     DWORD PTR [TEMP+4],ECX
+ 	   SUB     ECX,EAX
+ 	   AND     EAX,ECX
+ 	   MOV     DWORD PTR [TEMP],EAX
+ 	   FILD    QWORD PTR [TEMP]
+ 	   FSTP    QWORD PTR [TEMP]
+ 	   WAIT    ; WAIT only needed for compatibility with
+ 	           ; earlier processors
+ 	   MOV     ECX, DWORD PTR [TEMP+4]
+ 	   SHR     ECX,20
+ 	   SUB     ECX,3FFH
+ 	   TEST    EAX,EAX       ; clear zero flag
+        BS2:
+      Following piece of code expand ffs to similar beast.
+        */
+ 
+   else if (TARGET_PENTIUM && !optimize_size && TARGET_80387)
+     {
+       rtx label = gen_label_rtx ();
+       rtx lo, hi;
+       rtx mem = assign_386_stack_local (DImode, 0);
+       rtx fptmp = gen_reg_rtx (DFmode);
+       split_di (&mem, 1, &lo, &hi);
+ 
+       emit_move_insn (out, const0_rtx);
+ 
+       emit_cmp_and_jump_insns (in, const0_rtx, EQ, 0, SImode, 1, 0, label);
+ 
+       emit_move_insn (hi, out);
+       emit_insn (gen_subsi3 (out, out, in));
+       emit_insn (gen_andsi3 (out, out, in));
+       emit_move_insn (lo, out);
+       emit_insn (gen_floatdidf2 (fptmp,mem));
+       emit_move_insn (gen_rtx_MEM (DFmode, XEXP (mem, 0)), fptmp);
+       emit_move_insn (out, hi);
+       emit_insn (gen_lshrsi3 (out, out, GEN_INT (20)));
+       emit_insn (gen_subsi3 (out, out, GEN_INT (0x3fe)));
+ 
+       emit_label (label);
+       LABEL_NUSES (label) = 1;
+ 
+       emit_move_insn (operands[0], out);
      }
    else
      {
***************
*** 6789,6798 ****
  			      const0_rtx)));
        emit_insn (gen_negsi2 (tmp, tmp));
        emit_insn (gen_iorsi3 (out, out, tmp));
      }
-   emit_insn (gen_addsi3 (out, out, const1_rtx));
- 
-   emit_move_insn (operands[0], out);
    DONE;  
  }")
  
--- 6840,6848 ----
  			      const0_rtx)));
        emit_insn (gen_negsi2 (tmp, tmp));
        emit_insn (gen_iorsi3 (out, out, tmp));
+       emit_insn (gen_addsi3 (out, out, const1_rtx));
+       emit_move_insn (operands[0], out);
      }
    DONE;  
  }")
  



More information about the Gcc-patches mailing list