This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

i386 bitops patterns


Implements ctz and clz patterns.  Delays expansion of ffs long
enough for optimizers to propagate constants in and simplify.

For the last, I dropped the float trick for Pentium for now.  It
could go back in, but ought to be done such that ctz and clz
could take advantage of it as well.


r~


        * config/i386/i386.md (UNSPEC_BSF): Remove.
        (ffssi2): Split into cmove and no_cmove insns and splitters;
        lose pentium float trick for now.
        (ffssi_1): Add * to name; use CTZ instead of UNSPEC.
        (ctzsi2, clzsi2, bsr): New.

Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.423
diff -c -p -d -r1.423 i386.md
*** config/i386/i386.md	4 Feb 2003 20:47:46 -0000	1.423
--- config/i386/i386.md	5 Feb 2003 09:37:20 -0000
***************
*** 80,86 ****
     (UNSPEC_SCAS			20)
     (UNSPEC_SIN			21)
     (UNSPEC_COS			22)
-    (UNSPEC_BSF			23)
     (UNSPEC_FNSTSW		24)
     (UNSPEC_SAHF			25)
     (UNSPEC_FSTCW		26)
--- 80,85 ----
***************
*** 14110,14213 ****
    [(set_attr "type" "leave")])
  
  (define_expand "ffssi2"
!   [(set (match_operand:SI 0 "nonimmediate_operand" "") 
! 	(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
    ""
! {
!   rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode);
!   rtx in = operands[1];
! 
!   if (TARGET_CMOVE)
!     {
!       emit_move_insn (tmp, constm1_rtx);
!       emit_insn (gen_ffssi_1 (out, in));
!       emit_insn (gen_rtx_SET (VOIDmode, out,
! 		  gen_rtx_IF_THEN_ELSE (SImode, 
! 		    gen_rtx_EQ (VOIDmode, gen_rtx_REG (CCZmode, FLAGS_REG),
! 				const0_rtx),
! 		    tmp,
! 		    out)));
!       emit_insn (gen_addsi3 (out, out, const1_rtx));
!       emit_move_insn (operands[0], out);
!     }
! 
!   /* Pentium bsf instruction is extremely slow.  The following code is
!      recommended by the Intel Optimizing Manual as a reasonable replacement:
!            TEST    EAX,EAX
! 	   JZ      SHORT BS2
! 	   XOR     ECX,ECX
! 	   MOV     DWORD PTR [TEMP+4],ECX
! 	   SUB     ECX,EAX
! 	   AND     EAX,ECX
! 	   MOV     DWORD PTR [TEMP],EAX
! 	   FILD    QWORD PTR [TEMP]
! 	   FSTP    QWORD PTR [TEMP]
! 	   WAIT    ; WAIT only needed for compatibility with
! 	           ; earlier processors
! 	   MOV     ECX, DWORD PTR [TEMP+4]
! 	   SHR     ECX,20
! 	   SUB     ECX,3FFH
! 	   TEST    EAX,EAX       ; clear zero flag
!        BS2:
!      Following piece of code expand ffs to similar beast.
!        */
! 
!   else if (TARGET_PENTIUM && !optimize_size && TARGET_80387)
!     {
!       rtx label = gen_label_rtx ();
!       rtx lo, hi;
!       rtx mem = assign_386_stack_local (DImode, 0);
!       rtx fptmp = gen_reg_rtx (DFmode);
!       split_di (&mem, 1, &lo, &hi);
! 
!       emit_move_insn (out, const0_rtx);
! 
!       emit_cmp_and_jump_insns (in, const0_rtx, EQ, 0, SImode, 1, label);
! 
!       emit_move_insn (hi, out);
!       emit_insn (gen_subsi3 (out, out, in));
!       emit_insn (gen_andsi3 (out, out, in));
!       emit_move_insn (lo, out);
!       emit_insn (gen_floatdidf2 (fptmp,mem));
!       emit_move_insn (gen_rtx_MEM (DFmode, XEXP (mem, 0)), fptmp);
!       emit_move_insn (out, hi);
!       emit_insn (gen_lshrsi3 (out, out, GEN_INT (20)));
!       emit_insn (gen_subsi3 (out, out, GEN_INT (0x3ff - 1)));
  
!       emit_label (label);
!       LABEL_NUSES (label) = 1;
  
!       emit_move_insn (operands[0], out);
!     }
!   else
!     {
!       emit_move_insn (tmp, const0_rtx);
!       emit_insn (gen_ffssi_1 (out, in));
!       emit_insn (gen_rtx_SET (VOIDmode, 
! 		  gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (QImode, tmp)),
! 		  gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG),
! 			      const0_rtx)));
!       emit_insn (gen_negsi2 (tmp, tmp));
!       emit_insn (gen_iorsi3 (out, out, tmp));
!       emit_insn (gen_addsi3 (out, out, const1_rtx));
!       emit_move_insn (operands[0], out);
!     }
!   DONE;  
  })
  
! (define_insn "ffssi_1"
    [(set (reg:CCZ 17)
!         (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
  		     (const_int 0)))
     (set (match_operand:SI 0 "register_operand" "=r")
! 	(unspec:SI [(match_dup 1)] UNSPEC_BSF))]
    ""
    "bsf{l}\t{%1, %0|%0, %1}"
    [(set_attr "prefix_0f" "1")
     (set_attr "ppro_uops" "few")])
  
! ;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger
! ;; and slower than the two-byte movzx insn needed to do the work in SImode.
  
  ;; Thread-local storage patterns for ELF.
  ;;
--- 14109,14206 ----
    [(set_attr "type" "leave")])
  
  (define_expand "ffssi2"
!   [(parallel
!      [(set (match_operand:SI 0 "register_operand" "") 
! 	   (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))
!       (clobber (match_scratch:SI 2 ""))
!       (clobber (reg:CC 17))])]
    ""
!   "")
  
! (define_insn_and_split "*ffs_cmove"
!   [(set (match_operand:SI 0 "register_operand" "=r") 
! 	(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
!    (clobber (match_scratch:SI 2 "=&r"))
!    (clobber (reg:CC 17))]
!   "TARGET_CMOVE"
!   "#"
!   "&& reload_completed"
!   [(set (match_dup 2) (const_int -1))
!    (parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0)))
! 	      (set (match_dup 0) (ctz:SI (match_dup 1)))])
!    (set (match_dup 0) (if_then_else:SI
! 			(eq (reg:CCZ 17) (const_int 0))
! 			(match_dup 2)
! 			(match_dup 0)))
!    (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
! 	      (clobber (reg:CC 17))])]
!   "")
  
! (define_insn_and_split "*ffs_no_cmove"
!   [(set (match_operand:SI 0 "nonimmediate_operand" "=r") 
! 	(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
!    (clobber (match_scratch:SI 2 "=&r"))
!    (clobber (reg:CC 17))]
!   ""
!   "#"
!   "reload_completed"
!   [(parallel [(set (match_dup 2) (const_int 0))
! 	      (clobber (reg:CC 17))])
!    (parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0)))
! 	      (set (match_dup 0) (ctz:SI (match_dup 1)))])
!    (set (strict_low_part (match_dup 3))
! 	(eq:QI (reg:CCZ 17) (const_int 0)))
!    (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
! 	      (clobber (reg:CC 17))])
!    (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
! 	      (clobber (reg:CC 17))])
!    (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
! 	      (clobber (reg:CC 17))])]
! {
!   operands[3] = gen_lowpart (QImode, operands[2]);
  })
  
! (define_insn "*ffssi_1"
    [(set (reg:CCZ 17)
! 	(compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
  		     (const_int 0)))
     (set (match_operand:SI 0 "register_operand" "=r")
! 	(ctz:SI (match_dup 1)))]
    ""
    "bsf{l}\t{%1, %0|%0, %1}"
    [(set_attr "prefix_0f" "1")
     (set_attr "ppro_uops" "few")])
  
! (define_insn "ctzsi2"
!   [(set (match_operand:SI 0 "register_operand" "=r")
! 	(ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
!    (clobber (reg:CC 17))]
!   ""
!   "bsf{l}\t{%1, %0|%0, %1}"
!   [(set_attr "prefix_0f" "1")
!    (set_attr "ppro_uops" "few")])
! 
! (define_expand "clzsi2"
!   [(parallel
!      [(set (match_operand:SI 0 "register_operand" "")
! 	   (minus:SI (const_int 31)
! 		     (clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))))
!       (clobber (reg:CC 17))])
!    (parallel
!      [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
!       (clobber (reg:CC 17))])]
!   ""
!   "")
! 
! (define_insn "*bsr"
!   [(set (match_operand:SI 0 "register_operand" "=r")
! 	(minus:SI (const_int 31)
! 		  (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
!    (clobber (reg:CC 17))]
!   ""
!   "bsr{l}\t{%1, %0|%0, %1}"
!   [(set_attr "prefix_0f" "1")
!    (set_attr "ppro_uops" "few")])
  
  ;; Thread-local storage patterns for ELF.
  ;;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]