This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
i386 bitops patterns
- From: Richard Henderson <rth at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 5 Feb 2003 01:45:28 -0800
- Subject: i386 bitops patterns
Implements ctz and clz patterns. Delays expansion of ffs long
enough for optimizers to propagate constants in and simplify.
For the last, I dropped the float trick for Pentium for now. It
could go back in, but ought to be done such that ctz and clz
could take advantage of it as well.
r~
* config/i386/i386.md (UNSPEC_BSF): Remove.
(ffssi2): Split into cmove and no_cmove insns and splitters;
lose pentium float trick for now.
(ffssi_1): Add * to name; use CTZ instead of UNSPEC.
(ctzsi2, clzsi2, bsr): New.
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.423
diff -c -p -d -r1.423 i386.md
*** config/i386/i386.md 4 Feb 2003 20:47:46 -0000 1.423
--- config/i386/i386.md 5 Feb 2003 09:37:20 -0000
***************
*** 80,86 ****
(UNSPEC_SCAS 20)
(UNSPEC_SIN 21)
(UNSPEC_COS 22)
- (UNSPEC_BSF 23)
(UNSPEC_FNSTSW 24)
(UNSPEC_SAHF 25)
(UNSPEC_FSTCW 26)
--- 80,85 ----
***************
*** 14110,14213 ****
[(set_attr "type" "leave")])
(define_expand "ffssi2"
! [(set (match_operand:SI 0 "nonimmediate_operand" "")
! (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
""
! {
! rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode);
! rtx in = operands[1];
!
! if (TARGET_CMOVE)
! {
! emit_move_insn (tmp, constm1_rtx);
! emit_insn (gen_ffssi_1 (out, in));
! emit_insn (gen_rtx_SET (VOIDmode, out,
! gen_rtx_IF_THEN_ELSE (SImode,
! gen_rtx_EQ (VOIDmode, gen_rtx_REG (CCZmode, FLAGS_REG),
! const0_rtx),
! tmp,
! out)));
! emit_insn (gen_addsi3 (out, out, const1_rtx));
! emit_move_insn (operands[0], out);
! }
!
! /* Pentium bsf instruction is extremely slow. The following code is
! recommended by the Intel Optimizing Manual as a reasonable replacement:
! TEST EAX,EAX
! JZ SHORT BS2
! XOR ECX,ECX
! MOV DWORD PTR [TEMP+4],ECX
! SUB ECX,EAX
! AND EAX,ECX
! MOV DWORD PTR [TEMP],EAX
! FILD QWORD PTR [TEMP]
! FSTP QWORD PTR [TEMP]
! WAIT ; WAIT only needed for compatibility with
! ; earlier processors
! MOV ECX, DWORD PTR [TEMP+4]
! SHR ECX,20
! SUB ECX,3FFH
! TEST EAX,EAX ; clear zero flag
! BS2:
! Following piece of code expand ffs to similar beast.
! */
!
! else if (TARGET_PENTIUM && !optimize_size && TARGET_80387)
! {
! rtx label = gen_label_rtx ();
! rtx lo, hi;
! rtx mem = assign_386_stack_local (DImode, 0);
! rtx fptmp = gen_reg_rtx (DFmode);
! split_di (&mem, 1, &lo, &hi);
!
! emit_move_insn (out, const0_rtx);
!
! emit_cmp_and_jump_insns (in, const0_rtx, EQ, 0, SImode, 1, label);
!
! emit_move_insn (hi, out);
! emit_insn (gen_subsi3 (out, out, in));
! emit_insn (gen_andsi3 (out, out, in));
! emit_move_insn (lo, out);
! emit_insn (gen_floatdidf2 (fptmp,mem));
! emit_move_insn (gen_rtx_MEM (DFmode, XEXP (mem, 0)), fptmp);
! emit_move_insn (out, hi);
! emit_insn (gen_lshrsi3 (out, out, GEN_INT (20)));
! emit_insn (gen_subsi3 (out, out, GEN_INT (0x3ff - 1)));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! emit_move_insn (operands[0], out);
! }
! else
! {
! emit_move_insn (tmp, const0_rtx);
! emit_insn (gen_ffssi_1 (out, in));
! emit_insn (gen_rtx_SET (VOIDmode,
! gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (QImode, tmp)),
! gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG),
! const0_rtx)));
! emit_insn (gen_negsi2 (tmp, tmp));
! emit_insn (gen_iorsi3 (out, out, tmp));
! emit_insn (gen_addsi3 (out, out, const1_rtx));
! emit_move_insn (operands[0], out);
! }
! DONE;
})
! (define_insn "ffssi_1"
[(set (reg:CCZ 17)
! (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
(const_int 0)))
(set (match_operand:SI 0 "register_operand" "=r")
! (unspec:SI [(match_dup 1)] UNSPEC_BSF))]
""
"bsf{l}\t{%1, %0|%0, %1}"
[(set_attr "prefix_0f" "1")
(set_attr "ppro_uops" "few")])
! ;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger
! ;; and slower than the two-byte movzx insn needed to do the work in SImode.
;; Thread-local storage patterns for ELF.
;;
--- 14109,14206 ----
[(set_attr "type" "leave")])
(define_expand "ffssi2"
! [(parallel
! [(set (match_operand:SI 0 "register_operand" "")
! (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))
! (clobber (match_scratch:SI 2 ""))
! (clobber (reg:CC 17))])]
""
! "")
! (define_insn_and_split "*ffs_cmove"
! [(set (match_operand:SI 0 "register_operand" "=r")
! (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
! (clobber (match_scratch:SI 2 "=&r"))
! (clobber (reg:CC 17))]
! "TARGET_CMOVE"
! "#"
! "&& reload_completed"
! [(set (match_dup 2) (const_int -1))
! (parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0)))
! (set (match_dup 0) (ctz:SI (match_dup 1)))])
! (set (match_dup 0) (if_then_else:SI
! (eq (reg:CCZ 17) (const_int 0))
! (match_dup 2)
! (match_dup 0)))
! (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
! (clobber (reg:CC 17))])]
! "")
! (define_insn_and_split "*ffs_no_cmove"
! [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
! (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
! (clobber (match_scratch:SI 2 "=&r"))
! (clobber (reg:CC 17))]
! ""
! "#"
! "reload_completed"
! [(parallel [(set (match_dup 2) (const_int 0))
! (clobber (reg:CC 17))])
! (parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0)))
! (set (match_dup 0) (ctz:SI (match_dup 1)))])
! (set (strict_low_part (match_dup 3))
! (eq:QI (reg:CCZ 17) (const_int 0)))
! (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
! (clobber (reg:CC 17))])
! (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
! (clobber (reg:CC 17))])
! (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
! (clobber (reg:CC 17))])]
! {
! operands[3] = gen_lowpart (QImode, operands[2]);
})
! (define_insn "*ffssi_1"
[(set (reg:CCZ 17)
! (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
(const_int 0)))
(set (match_operand:SI 0 "register_operand" "=r")
! (ctz:SI (match_dup 1)))]
""
"bsf{l}\t{%1, %0|%0, %1}"
[(set_attr "prefix_0f" "1")
(set_attr "ppro_uops" "few")])
! (define_insn "ctzsi2"
! [(set (match_operand:SI 0 "register_operand" "=r")
! (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
! (clobber (reg:CC 17))]
! ""
! "bsf{l}\t{%1, %0|%0, %1}"
! [(set_attr "prefix_0f" "1")
! (set_attr "ppro_uops" "few")])
!
! (define_expand "clzsi2"
! [(parallel
! [(set (match_operand:SI 0 "register_operand" "")
! (minus:SI (const_int 31)
! (clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))))
! (clobber (reg:CC 17))])
! (parallel
! [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
! (clobber (reg:CC 17))])]
! ""
! "")
!
! (define_insn "*bsr"
! [(set (match_operand:SI 0 "register_operand" "=r")
! (minus:SI (const_int 31)
! (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
! (clobber (reg:CC 17))]
! ""
! "bsr{l}\t{%1, %0|%0, %1}"
! [(set_attr "prefix_0f" "1")
! (set_attr "ppro_uops" "few")])
;; Thread-local storage patterns for ELF.
;;