x86_64 merger part 24 - string operations
Jan Hubicka
hubicka@atrey.karlin.mff.cuni.cz
Sat Mar 24 16:48:00 GMT 2001
Hi
This patch adds the string patterns. The code produced by these should be
comparable with 32bit one, except for movstrlensi_unroll_1 case, that should be
updated to work on 64bit values, but should be OK for first pass.
Ne bøe 25 00:39:39 CET 2001 Jan Hubicka <jh@suse.cz>
* i386.md (movstrsi): Move offline.
(movstrdi): New.
(strmovdi_rex64): New.
(strmov?i): Accept 64bit.
(strmov?i_rex64): New.
(strmov?i_rex_1): New.
(strmov?i_1): Disable for 64bit.
(rep_mov?i_rex64): New.
(rep_mov?i): Disable for 64bit.
(clrstrsi): Move offline.
(strset?i_rex64): New.
(strset?i: Accept 64bit.
(rep_stos?i): Disable for 64bit.
(rep_stos?i_rex64): New.
(strset?i_rex_1): New.
(strset?i_1): Disable for 64bit.
(cmpstrsi): Accept 64bit.
(cmpstrsi_nz_1): Rename to cmpstrqi_nz_1; Disable for 64bit.
(cmpstrqi_nz_rex_1): New.
(cmpstrsi_1): Rename to cmpstrqi_1; Disable for 64bit.
(strlensi): Move offline.
(strlendi): New.
(strlenqi_1): Disable for 64bit; fix constraints.
(strlenqi_rex_1): New.
* i386.c (ix86_adjust_counter): New static function.
(ix86_zero_extend_to_Pmode): Likewise.
(ix86_expand_aligntest): Likweise.
(ix86_expand_strlensi_unroll_1): Make static; update for 64bit.
(ix86_expand_movstr): New global function.
(ix86_expand_clrstr): New global function.
(ix86_expand_strlen): New global function.
* i386-protos.h (ix86_expand_movstr, ix86_expand_clrstr,
ix86_expand_strlen): Declare.
(ix86_expand_strlensi_unroll_1): Delete.
*** i386.md Thu Mar 22 21:45:30 2001
--- /p1/new/x86-64/gcc/gcc/config/i386/i386.md Sun Mar 25 00:35:28 2001
***************
*** 12021,12215 ****
(use (match_operand:BLK 1 "memory_operand" ""))
(use (match_operand:SI 2 "nonmemory_operand" ""))
(use (match_operand:SI 3 "const_int_operand" ""))]
! ""
"
{
! rtx srcreg, destreg, countreg;
! int align = 0;
! int count = -1;
! rtx insns;
!
! start_sequence ();
!
! if (GET_CODE (operands[3]) == CONST_INT)
! align = INTVAL (operands[3]);
!
! /* This simple hack avoids all inlining code and simplifies code bellow. */
! if (!TARGET_ALIGN_STRINGOPS)
! align = 32;
!
! if (GET_CODE (operands[2]) == CONST_INT)
! count = INTVAL (operands[2]);
!
! destreg = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
! srcreg = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
!
! emit_insn (gen_cld ());
!
! /* When optimizing for size emit simple rep ; movsb instruction for
! counts not divisible by 4. */
! if ((!optimize || optimize_size)
! && (count < 0 || (count & 0x03)))
! {
! countreg = copy_to_mode_reg (SImode, operands[2]);
! emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
! destreg, srcreg, countreg));
! }
! /* For constant aligned (or small unaligned) copies use rep movsl
! followed by code copying the rest. For PentiumPro ensure 8 byte
! alignment to allow rep movsl acceleration. */
! else if (count >= 0
! && (align >= 8
! || (!TARGET_PENTIUMPRO && align >= 4)
! || optimize_size || count < 64))
! {
! if (count & ~0x03)
! {
! countreg = copy_to_mode_reg (SImode,
! GEN_INT ((count >> 2)
! & 0x3fffffff));
! emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
! destreg, srcreg, countreg));
! }
! if (count & 0x02)
! emit_insn (gen_strmovhi (destreg, srcreg));
! if (count & 0x01)
! emit_insn (gen_strmovqi (destreg, srcreg));
! }
! /* The generic code based on the glibc implementation:
! - align destination to 4 bytes (8 byte alignment is used for PentiumPro
! allowing accelerated copying there)
! - copy the data using rep movsl
! - copy the rest. */
! else
{
! rtx countreg2;
! rtx label = NULL;
!
! /* In case we don't know anything about the alignment, default to
! library version, since it is usually equally fast and result in
! shorter code. */
! if (!TARGET_INLINE_ALL_STRINGOPS && align < 4)
! {
! end_sequence ();
! FAIL;
! }
!
! if (TARGET_SINGLE_STRINGOP)
! emit_insn (gen_cld ());
!
! countreg2 = gen_reg_rtx (SImode);
! countreg = copy_to_mode_reg (SImode, operands[2]);
!
! /* We don't use loops to align destination and to copy parts smaller
! than 4 bytes, because gcc is able to optimize such code better (in
! the case the destination or the count really is aligned, gcc is often
! able to predict the branches) and also it is friendlier to the
! hardware branch prediction.
!
! Using loops is benefical for generic case, because we can
! handle small counts using the loops. Many CPUs (such as Athlon)
! have large REP prefix setup costs.
!
! This is quite costy. Maybe we can revisit this decision later or
! add some customizability to this code. */
!
! if (count < 0
! && align < (TARGET_PENTIUMPRO && (count < 0 || count >= 260) ? 8 : 4))
! {
! label = gen_label_rtx ();
! emit_cmp_and_jump_insns (countreg, GEN_INT (3),
! LEU, 0, SImode, 1, 0, label);
! }
! if (align <= 1)
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (SImode);
! emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (1)));
! emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
! SImode, 1, 0, label);
! emit_insn (gen_strmovqi (destreg, srcreg));
! emit_insn (gen_addsi3 (countreg, countreg, constm1_rtx));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align <= 2)
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (SImode);
! emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (2)));
! emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
! SImode, 1, 0, label);
! emit_insn (gen_strmovhi (destreg, srcreg));
! emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-2)));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align <= 4 && TARGET_PENTIUMPRO && (count < 1 || count >= 260))
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (SImode);
! emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (4)));
! emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
! SImode, 1, 0, label);
! emit_insn (gen_strmovsi (destreg, srcreg));
! emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-4)));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
!
! if (!TARGET_SINGLE_STRINGOP)
! emit_insn (gen_cld());
! emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
! emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
! destreg, srcreg, countreg2));
!
! if (label)
! {
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align > 2 && count > 0 && (count & 2))
! emit_insn (gen_strmovhi (destreg, srcreg));
! if (align <= 2 || count < 0)
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (SImode);
! emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (2)));
! emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
! SImode, 1, 0, label);
! emit_insn (gen_strmovhi (destreg, srcreg));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align > 1 && count > 0 && (count & 1))
! emit_insn (gen_strmovsi (destreg, srcreg));
! if (align <= 1 || count < 0)
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (SImode);
! emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (1)));
! emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
! SImode, 1, 0, label);
! emit_insn (gen_strmovqi (destreg, srcreg));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
}
!
! insns = get_insns ();
! end_sequence ();
!
! ix86_set_move_mem_attrs (insns, operands[0], operands[1], destreg, srcreg);
! emit_insns (insns);
! DONE;
}")
- ;; Most CPUs don't like single string operations
- ;; Handle this case here to simplify previous expander.
(define_expand "strmovsi"
[(set (match_dup 2)
--- 14261,14314 ----
(use (match_operand:BLK 1 "memory_operand" ""))
(use (match_operand:SI 2 "nonmemory_operand" ""))
(use (match_operand:SI 3 "const_int_operand" ""))]
! "TARGET_64BIT"
"
{
! if (ix86_expand_movstr (operands[0], operands[1], operands[2], operands[3]))
! DONE;
! else
! FAIL;
! }")
! (define_expand "movstrdi"
! [(use (match_operand:BLK 0 "memory_operand" ""))
! (use (match_operand:BLK 1 "memory_operand" ""))
! (use (match_operand:DI 2 "nonmemory_operand" ""))
! (use (match_operand:DI 3 "const_int_operand" ""))]
! "TARGET_64BIT"
! "
! {
! if (ix86_expand_movstr (operands[0], operands[1], operands[2], operands[3]))
! DONE;
! else
! FAIL;
! }")
! ;; Most CPUs don't like single string operations
! ;; Handle this case here to simplify previous expander.
! (define_expand "strmovdi_rex64"
! [(set (match_dup 2)
! (mem:DI (match_operand:DI 1 "register_operand" "")))
! (set (mem:DI (match_operand:DI 0 "register_operand" ""))
! (match_dup 2))
! (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8)))
! (clobber (reg:CC 17))])
! (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 8)))
! (clobber (reg:CC 17))])]
! "TARGET_64BIT"
! "
! {
! if (TARGET_SINGLE_STRINGOP || optimize_size)
{
! emit_insn (gen_strmovdi_rex_1 (operands[0], operands[1], operands[0],
! operands[1]));
! DONE;
}
! else
! operands[2] = gen_reg_rtx (DImode);
}")
(define_expand "strmovsi"
[(set (match_dup 2)
***************
*** 12220,12228 ****
(clobber (reg:CC 17))])
(parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))
(clobber (reg:CC 17))])]
""
"
{
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strmovsi_1 (operands[0], operands[1], operands[0],
--- 14319,14332 ----
(clobber (reg:CC 17))])
(parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))
(clobber (reg:CC 17))])]
""
"
{
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_strmovsi_rex64 (operands[0], operands[1]));
+ DONE;
+ }
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strmovsi_1 (operands[0], operands[1], operands[0],
***************
*** 12233,12238 ****
--- 14337,14364 ----
operands[2] = gen_reg_rtx (SImode);
}")
+ (define_expand "strmovsi_rex64"
+ [(set (match_dup 2)
+ (mem:SI (match_operand:DI 1 "register_operand" "")))
+ (set (mem:SI (match_operand:DI 0 "register_operand" ""))
+ (match_dup 2))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4)))
+ (clobber (reg:CC 17))])
+ (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 4)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+ {
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strmovsi_rex_1 (operands[0], operands[1], operands[0],
+ operands[1]));
+ DONE;
+ }
+ else
+ operands[2] = gen_reg_rtx (SImode);
+ }")
+
(define_expand "strmovhi"
[(set (match_dup 2)
(mem:HI (match_operand:SI 1 "register_operand" "")))
***************
*** 12240,12254 ****
(match_dup 2))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2)))
(clobber (reg:CC 17))])
! (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 2)))
(clobber (reg:CC 17))])]
! ""
"
{
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
! emit_insn (gen_strmovhi_1 (operands[0], operands[1], operands[0],
! operands[1]));
DONE;
}
else
--- 14366,14407 ----
(match_dup 2))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2)))
(clobber (reg:CC 17))])
! (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 2)))
! (clobber (reg:CC 17))])]
! ""
! "
! {
! if (TARGET_64BIT)
! {
! emit_insn (gen_strmovhi_rex64 (operands[0], operands[1]));
! DONE;
! }
! if (TARGET_SINGLE_STRINGOP || optimize_size)
! {
! emit_insn (gen_strmovhi_1 (operands[0], operands[1], operands[0],
! operands[1]));
! DONE;
! }
! else
! operands[2] = gen_reg_rtx (HImode);
! }")
!
! (define_expand "strmovhi_rex64"
! [(set (match_dup 2)
! (mem:HI (match_operand:DI 1 "register_operand" "")))
! (set (mem:HI (match_operand:DI 0 "register_operand" ""))
! (match_dup 2))
! (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2)))
! (clobber (reg:CC 17))])
! (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 2)))
(clobber (reg:CC 17))])]
! "TARGET_64BIT"
"
{
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
! emit_insn (gen_strmovhi_rex_1 (operands[0], operands[1], operands[0],
! operands[1]));
DONE;
}
else
***************
*** 12264,12272 ****
(clobber (reg:CC 17))])
(parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 1)))
(clobber (reg:CC 17))])]
! ""
"
{
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strmovqi_1 (operands[0], operands[1], operands[0],
--- 14417,14430 ----
(clobber (reg:CC 17))])
(parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 1)))
(clobber (reg:CC 17))])]
! ""
"
{
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_strmovqi_rex64 (operands[0], operands[1]));
+ DONE;
+ }
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strmovqi_1 (operands[0], operands[1], operands[0],
***************
*** 12277,12282 ****
--- 14435,14478 ----
operands[2] = gen_reg_rtx (QImode);
}")
+ (define_expand "strmovqi_rex64"
+ [(set (match_dup 2)
+ (mem:QI (match_operand:DI 1 "register_operand" "")))
+ (set (mem:QI (match_operand:DI 0 "register_operand" ""))
+ (match_dup 2))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
+ (clobber (reg:CC 17))])
+ (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 1)))
+ (clobber (reg:CC 17))])]
+ "!TARGET_64BIT"
+ "
+ {
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strmovqi_rex_1 (operands[0], operands[1], operands[0],
+ operands[1]));
+ DONE;
+ }
+ else
+ operands[2] = gen_reg_rtx (QImode);
+ }")
+
+ (define_insn "strmovdi_rex_1"
+ [(set (mem:DI (match_operand:DI 2 "register_operand" "0"))
+ (mem:DI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 8)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 8)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "movsq"
+ [(set_attr "type" "str")
+ (set_attr "mode" "DI")
+ (set_attr "memory" "both")])
+
(define_insn "strmovsi_1"
[(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
(mem:SI (match_operand:SI 3 "register_operand" "1")))
***************
*** 12287,12294 ****
(plus:SI (match_dup 3)
(const_int 4)))
(use (reg:SI 19))]
! "TARGET_SINGLE_STRINGOP || optimize_size"
! "movsl"
[(set_attr "type" "str")
(set_attr "mode" "SI")
(set_attr "memory" "both")])
--- 14483,14506 ----
(plus:SI (match_dup 3)
(const_int 4)))
(use (reg:SI 19))]
! "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
! "movsl|movsd"
! [(set_attr "type" "str")
! (set_attr "mode" "SI")
! (set_attr "memory" "both")])
!
! (define_insn "strmovsi_rex_1"
! [(set (mem:SI (match_operand:DI 2 "register_operand" "0"))
! (mem:SI (match_operand:DI 3 "register_operand" "1")))
! (set (match_operand:DI 0 "register_operand" "=D")
! (plus:DI (match_dup 2)
! (const_int 4)))
! (set (match_operand:DI 1 "register_operand" "=S")
! (plus:DI (match_dup 3)
! (const_int 4)))
! (use (reg:SI 19))]
! "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
! "movsl|movsd"
[(set_attr "type" "str")
(set_attr "mode" "SI")
(set_attr "memory" "both")])
***************
*** 12303,12309 ****
(plus:SI (match_dup 3)
(const_int 2)))
(use (reg:SI 19))]
! "TARGET_SINGLE_STRINGOP || optimize_size"
"movsw"
[(set_attr "type" "str")
(set_attr "memory" "both")
--- 14515,14537 ----
(plus:SI (match_dup 3)
(const_int 2)))
(use (reg:SI 19))]
! "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
! "movsw"
! [(set_attr "type" "str")
! (set_attr "memory" "both")
! (set_attr "mode" "HI")])
!
! (define_insn "strmovhi_rex_1"
! [(set (mem:HI (match_operand:DI 2 "register_operand" "0"))
! (mem:HI (match_operand:DI 3 "register_operand" "1")))
! (set (match_operand:DI 0 "register_operand" "=D")
! (plus:DI (match_dup 2)
! (const_int 2)))
! (set (match_operand:DI 1 "register_operand" "=S")
! (plus:DI (match_dup 3)
! (const_int 2)))
! (use (reg:SI 19))]
! "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
"movsw"
[(set_attr "type" "str")
(set_attr "memory" "both")
***************
*** 12319,12330 ****
(plus:SI (match_dup 3)
(const_int 1)))
(use (reg:SI 19))]
! "TARGET_SINGLE_STRINGOP || optimize_size"
"movsb"
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "QI")])
(define_insn "rep_movsi"
[(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
(set (match_operand:SI 0 "register_operand" "=D")
--- 14547,14594 ----
(plus:SI (match_dup 3)
(const_int 1)))
(use (reg:SI 19))]
! "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
! "movsb"
! [(set_attr "type" "str")
! (set_attr "memory" "both")
! (set_attr "mode" "QI")])
!
! (define_insn "strmovqi_rex_1"
! [(set (mem:QI (match_operand:DI 2 "register_operand" "0"))
! (mem:QI (match_operand:DI 3 "register_operand" "1")))
! (set (match_operand:DI 0 "register_operand" "=D")
! (plus:DI (match_dup 2)
! (const_int 1)))
! (set (match_operand:DI 1 "register_operand" "=S")
! (plus:DI (match_dup 3)
! (const_int 1)))
! (use (reg:SI 19))]
! "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
"movsb"
[(set_attr "type" "str")
(set_attr "memory" "both")
(set_attr "mode" "QI")])
+ (define_insn "rep_movdi_rex64"
+ [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
+ (const_int 3))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (ashift:DI (match_dup 5) (const_int 3))
+ (match_operand:DI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))
+ (use (reg:SI 19))]
+ "TARGET_64BIT"
+ "rep\;movsq|rep movsq"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "DI")])
+
(define_insn "rep_movsi"
[(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
(set (match_operand:SI 0 "register_operand" "=D")
***************
*** 12338,12344 ****
(mem:BLK (match_dup 4)))
(use (match_dup 5))
(use (reg:SI 19))]
! ""
"rep\;movsl|rep movsd"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
--- 14602,14628 ----
(mem:BLK (match_dup 4)))
(use (match_dup 5))
(use (reg:SI 19))]
! "!TARGET_64BIT"
! "rep\;movsl|rep movsd"
! [(set_attr "type" "str")
! (set_attr "prefix_rep" "1")
! (set_attr "memory" "both")
! (set_attr "mode" "SI")])
!
! (define_insn "rep_movsi_rex64"
! [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
! (set (match_operand:DI 0 "register_operand" "=D")
! (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
! (const_int 2))
! (match_operand:DI 3 "register_operand" "0")))
! (set (match_operand:DI 1 "register_operand" "=S")
! (plus:DI (ashift:DI (match_dup 5) (const_int 2))
! (match_operand:DI 4 "register_operand" "1")))
! (set (mem:BLK (match_dup 3))
! (mem:BLK (match_dup 4)))
! (use (match_dup 5))
! (use (reg:SI 19))]
! "TARGET_64BIT"
"rep\;movsl|rep movsd"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
***************
*** 12356,12362 ****
(mem:BLK (match_dup 4)))
(use (match_dup 5))
(use (reg:SI 19))]
! ""
"rep\;movsb|rep movsb"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
--- 14640,14664 ----
(mem:BLK (match_dup 4)))
(use (match_dup 5))
(use (reg:SI 19))]
! "!TARGET_64BIT"
! "rep\;movsb|rep movsb"
! [(set_attr "type" "str")
! (set_attr "prefix_rep" "1")
! (set_attr "memory" "both")
! (set_attr "mode" "SI")])
!
! (define_insn "rep_movqi_rex64"
! [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
! (set (match_operand:DI 0 "register_operand" "=D")
! (plus:DI (match_operand:DI 3 "register_operand" "0")
! (match_operand:DI 5 "register_operand" "2")))
! (set (match_operand:DI 1 "register_operand" "=S")
! (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5)))
! (set (mem:BLK (match_dup 3))
! (mem:BLK (match_dup 4)))
! (use (match_dup 5))
! (use (reg:SI 19))]
! "TARGET_64BIT"
"rep\;movsb|rep movsb"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
***************
*** 12366,12597 ****
(define_expand "clrstrsi"
[(use (match_operand:BLK 0 "memory_operand" ""))
(use (match_operand:SI 1 "nonmemory_operand" ""))
! (use (match_operand:SI 2 "const_int_operand" ""))]
""
"
{
! /* See comments in movstr expanders. The code is mostly identical. */
!
! rtx destreg, zeroreg, countreg;
! int align = 0;
! int count = -1;
! rtx insns;
!
! start_sequence ();
!
! if (GET_CODE (operands[2]) == CONST_INT)
! align = INTVAL (operands[2]);
!
! /* This simple hack avoids all inlining code and simplifies code bellow. */
! if (!TARGET_ALIGN_STRINGOPS)
! align = 32;
!
! if (GET_CODE (operands[1]) == CONST_INT)
! count = INTVAL (operands[1]);
!
! destreg = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
! emit_insn (gen_cld ());
! /* When optimizing for size emit simple rep ; movsb instruction for
! counts not divisible by 4. */
! if ((!optimize || optimize_size)
! && (count < 0 || (count & 0x03)))
! {
! countreg = copy_to_mode_reg (SImode, operands[1]);
! zeroreg = copy_to_mode_reg (QImode, const0_rtx);
! emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
! destreg, countreg));
! }
! else if (count >= 0
! && (align >= 8
! || (!TARGET_PENTIUMPRO && align >= 4)
! || optimize_size || count < 64))
! {
! zeroreg = copy_to_mode_reg (SImode, const0_rtx);
! if (INTVAL (operands[1]) & ~0x03)
! {
! countreg = copy_to_mode_reg (SImode,
! GEN_INT ((INTVAL (operands[1]) >> 2)
! & 0x3fffffff));
! emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
! destreg, countreg));
! }
! if (INTVAL (operands[1]) & 0x02)
! emit_insn (gen_strsethi (destreg,
! gen_rtx_SUBREG (HImode, zeroreg, 0)));
! if (INTVAL (operands[1]) & 0x01)
! emit_insn (gen_strsetqi (destreg,
! gen_rtx_SUBREG (QImode, zeroreg, 0)));
! }
! else
{
! rtx countreg2;
! rtx label = NULL;
!
! /* In case we don't know anything about the alignment, default to
! library version, since it is usually equally fast and result in
! shorter code. */
! if (!TARGET_INLINE_ALL_STRINGOPS && align < 4)
! {
! end_sequence ();
! FAIL;
! }
!
! if (TARGET_SINGLE_STRINGOP)
! emit_insn (gen_cld ());
!
! countreg2 = gen_reg_rtx (SImode);
! countreg = copy_to_mode_reg (SImode, operands[1]);
! zeroreg = copy_to_mode_reg (SImode, const0_rtx);
!
! if (count < 0
! && align < (TARGET_PENTIUMPRO && (count < 0 || count >= 260) ? 8 : 4))
! {
! label = gen_label_rtx ();
! emit_cmp_and_jump_insns (countreg, GEN_INT (3),
! LEU, 0, SImode, 1, 0, label);
! }
! if (align <= 1)
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (SImode);
! emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (1)));
! emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
! SImode, 1, 0, label);
! emit_insn (gen_strsetqi (destreg,
! gen_rtx_SUBREG (QImode, zeroreg, 0)));
! emit_insn (gen_addsi3 (countreg, countreg, constm1_rtx));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align <= 2)
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (SImode);
! emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (2)));
! emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
! SImode, 1, 0, label);
! emit_insn (gen_strsethi (destreg,
! gen_rtx_SUBREG (HImode, zeroreg, 0)));
! emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-2)));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align <= 4 && TARGET_PENTIUMPRO && (count < 1 || count >= 260))
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (SImode);
! emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (4)));
! emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
! SImode, 1, 0, label);
! emit_insn (gen_strsetsi (destreg, zeroreg));
! emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-4)));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
!
! if (!TARGET_SINGLE_STRINGOP)
! emit_insn (gen_cld());
! emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
! emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
! destreg, countreg2));
!
! if (label)
! {
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align > 2 && count > 0 && (count & 2))
! emit_insn (gen_strsethi (destreg,
! gen_rtx_SUBREG (HImode, zeroreg, 0)));
! if (align <= 2 || count < 0)
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (SImode);
! emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (2)));
! emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
! SImode, 1, 0, label);
! emit_insn (gen_strsethi (destreg,
! gen_rtx_SUBREG (HImode, zeroreg, 0)));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align > 1 && count > 0 && (count & 1))
! emit_insn (gen_strsetqi (destreg,
! gen_rtx_SUBREG (QImode, zeroreg, 0)));
! if (align <= 1 || count < 0)
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (SImode);
! emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (1)));
! emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
! SImode, 1, 0, label);
! emit_insn (gen_strsetqi (destreg,
! gen_rtx_SUBREG (QImode, zeroreg, 0)));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
}
-
- insns = get_insns ();
- end_sequence ();
-
- ix86_set_move_mem_attrs (insns, operands[0], operands[0], destreg, destreg);
- emit_insns (insns);
-
- DONE;
}")
- ;; Most CPUs don't like single string operations
- ;; Handle this case here to simplify previous expander.
-
(define_expand "strsetsi"
[(set (mem:SI (match_operand:SI 0 "register_operand" ""))
(match_operand:SI 1 "register_operand" ""))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4)))
(clobber (reg:CC 17))])]
""
"
{
! if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strsetsi_1 (operands[0], operands[0], operands[1]));
DONE;
}
}")
(define_expand "strsethi"
[(set (mem:HI (match_operand:SI 0 "register_operand" ""))
(match_operand:HI 1 "register_operand" ""))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2)))
(clobber (reg:CC 17))])]
""
"
{
! if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strsethi_1 (operands[0], operands[0], operands[1]));
DONE;
}
}")
(define_expand "strsetqi"
[(set (mem:QI (match_operand:SI 0 "register_operand" ""))
(match_operand:QI 1 "register_operand" ""))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
(clobber (reg:CC 17))])]
""
"
{
! if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strsetqi_1 (operands[0], operands[0], operands[1]));
DONE;
}
}")
(define_insn "strsetsi_1"
[(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
(match_operand:SI 2 "register_operand" "a"))
--- 14668,14832 ----
(define_expand "clrstrsi"
[(use (match_operand:BLK 0 "memory_operand" ""))
(use (match_operand:SI 1 "nonmemory_operand" ""))
! (use (match_operand 2 "const_int_operand" ""))]
""
"
{
! if (ix86_expand_clrstr (operands[0], operands[1], operands[2]))
! DONE;
! else
! FAIL;
! }")
! (define_expand "clrstrdi"
! [(use (match_operand:BLK 0 "memory_operand" ""))
! (use (match_operand:DI 1 "nonmemory_operand" ""))
! (use (match_operand 2 "const_int_operand" ""))]
! "TARGET_64BIT"
! "
! {
! if (ix86_expand_clrstr (operands[0], operands[1], operands[2]))
! DONE;
! else
! FAIL;
! }")
! ;; Most CPUs don't like single string operations
! ;; Handle this case here to simplify previous expander.
! (define_expand "strsetdi_rex64"
! [(set (mem:DI (match_operand:DI 0 "register_operand" ""))
! (match_operand:DI 1 "register_operand" ""))
! (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8)))
! (clobber (reg:CC 17))])]
! "TARGET_64BIT"
! "
! {
! if (TARGET_SINGLE_STRINGOP || optimize_size)
{
! emit_insn (gen_strsetdi_rex_1 (operands[0], operands[0], operands[1]));
! DONE;
}
}")
(define_expand "strsetsi"
[(set (mem:SI (match_operand:SI 0 "register_operand" ""))
(match_operand:SI 1 "register_operand" ""))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4)))
(clobber (reg:CC 17))])]
""
"
{
! if (TARGET_64BIT)
! {
! emit_insn (gen_strsetsi_rex64 (operands[0], operands[1]));
! DONE;
! }
! else if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strsetsi_1 (operands[0], operands[0], operands[1]));
DONE;
}
}")
+ (define_expand "strsetsi_rex64"
+ [(set (mem:SI (match_operand:DI 0 "register_operand" ""))
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+ {
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strsetsi_rex_1 (operands[0], operands[0], operands[1]));
+ DONE;
+ }
+ }")
+
(define_expand "strsethi"
[(set (mem:HI (match_operand:SI 0 "register_operand" ""))
(match_operand:HI 1 "register_operand" ""))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2)))
(clobber (reg:CC 17))])]
""
"
{
! if (TARGET_64BIT)
! {
! emit_insn (gen_strsethi_rex64 (operands[0], operands[1]));
! DONE;
! }
! else if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strsethi_1 (operands[0], operands[0], operands[1]));
DONE;
}
}")
+ (define_expand "strsethi_rex64"
+ [(set (mem:HI (match_operand:DI 0 "register_operand" ""))
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+ {
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strsethi_rex_1 (operands[0], operands[0], operands[1]));
+ DONE;
+ }
+ }")
+
(define_expand "strsetqi"
[(set (mem:QI (match_operand:SI 0 "register_operand" ""))
(match_operand:QI 1 "register_operand" ""))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
(clobber (reg:CC 17))])]
"!TARGET_64BIT"
"
{
! if (TARGET_64BIT)
! {
! emit_insn (gen_strsetqi_rex64 (operands[0], operands[1]));
! DONE;
! }
! else if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strsetqi_1 (operands[0], operands[0], operands[1]));
DONE;
}
}")
+ (define_expand "strsetqi_rex64"
+ [(set (mem:QI (match_operand:DI 0 "register_operand" ""))
+ (match_operand:QI 1 "register_operand" ""))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
+ (clobber (reg:CC 17))])]
+ "TARGET_64BIT"
+ "
+ {
+ if (TARGET_SINGLE_STRINGOP || optimize_size)
+ {
+ emit_insn (gen_strsetqi_rex_1 (operands[0], operands[0], operands[1]));
+ DONE;
+ }
+ }")
+
+ (define_insn "strsetdi_rex_1"
+ [(set (mem:SI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:SI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 8)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosq"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
(define_insn "strsetsi_1"
[(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
(match_operand:SI 2 "register_operand" "a"))
***************
*** 12599,12606 ****
(plus:SI (match_dup 1)
(const_int 4)))
(use (reg:SI 19))]
! "TARGET_SINGLE_STRINGOP || optimize_size"
! "stosl"
[(set_attr "type" "str")
(set_attr "memory" "store")
(set_attr "mode" "SI")])
--- 14834,14854 ----
(plus:SI (match_dup 1)
(const_int 4)))
(use (reg:SI 19))]
! "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
! "stosl|stosd"
! [(set_attr "type" "str")
! (set_attr "memory" "store")
! (set_attr "mode" "SI")])
!
! (define_insn "strsetsi_rex_1"
! [(set (mem:SI (match_operand:DI 1 "register_operand" "0"))
! (match_operand:SI 2 "register_operand" "a"))
! (set (match_operand:DI 0 "register_operand" "=D")
! (plus:DI (match_dup 1)
! (const_int 4)))
! (use (reg:SI 19))]
! "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
! "stosl|stosd"
[(set_attr "type" "str")
(set_attr "memory" "store")
(set_attr "mode" "SI")])
***************
*** 12612,12618 ****
(plus:SI (match_dup 1)
(const_int 2)))
(use (reg:SI 19))]
! "TARGET_SINGLE_STRINGOP || optimize_size"
"stosw"
[(set_attr "type" "str")
(set_attr "memory" "store")
--- 14860,14879 ----
(plus:SI (match_dup 1)
(const_int 2)))
(use (reg:SI 19))]
! "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
! "stosw"
! [(set_attr "type" "str")
! (set_attr "memory" "store")
! (set_attr "mode" "HI")])
!
! (define_insn "strsethi_rex_1"
! [(set (mem:HI (match_operand:DI 1 "register_operand" "0"))
! (match_operand:HI 2 "register_operand" "a"))
! (set (match_operand:DI 0 "register_operand" "=D")
! (plus:DI (match_dup 1)
! (const_int 2)))
! (use (reg:SI 19))]
! "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
"stosw"
[(set_attr "type" "str")
(set_attr "memory" "store")
***************
*** 12625,12636 ****
(plus:SI (match_dup 1)
(const_int 1)))
(use (reg:SI 19))]
! "TARGET_SINGLE_STRINGOP || optimize_size"
"stosb"
[(set_attr "type" "str")
(set_attr "memory" "store")
(set_attr "mode" "QI")])
(define_insn "rep_stossi"
[(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
(set (match_operand:SI 0 "register_operand" "=D")
--- 14886,14928 ----
(plus:SI (match_dup 1)
(const_int 1)))
(use (reg:SI 19))]
! "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
"stosb"
[(set_attr "type" "str")
(set_attr "memory" "store")
(set_attr "mode" "QI")])
+ (define_insn "strsetqi_rex_1"
+ [(set (mem:QI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:QI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 1)))
+ (use (reg:SI 19))]
+ "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+ "stosb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+ (define_insn "rep_stosdi_rex64"
+ [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
+ (const_int 3))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:DI 2 "register_operand" "a"))
+ (use (match_dup 4))
+ (use (reg:SI 19))]
+ "TARGET_64BIT"
+ "rep\;stosq|rep stosq"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
(define_insn "rep_stossi"
[(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
(set (match_operand:SI 0 "register_operand" "=D")
***************
*** 12642,12648 ****
(use (match_operand:SI 2 "register_operand" "a"))
(use (match_dup 4))
(use (reg:SI 19))]
! ""
"rep\;stosl|rep stosd"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
--- 14934,14958 ----
(use (match_operand:SI 2 "register_operand" "a"))
(use (match_dup 4))
(use (reg:SI 19))]
! "!TARGET_64BIT"
! "rep\;stosl|rep stosd"
! [(set_attr "type" "str")
! (set_attr "prefix_rep" "1")
! (set_attr "memory" "store")
! (set_attr "mode" "SI")])
!
! (define_insn "rep_stossi_rex64"
! [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
! (set (match_operand:DI 0 "register_operand" "=D")
! (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
! (const_int 2))
! (match_operand:DI 3 "register_operand" "0")))
! (set (mem:BLK (match_dup 3))
! (const_int 0))
! (use (match_operand:SI 2 "register_operand" "a"))
! (use (match_dup 4))
! (use (reg:SI 19))]
! "TARGET_64BIT"
"rep\;stosl|rep stosd"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
***************
*** 12659,12665 ****
(use (match_operand:QI 2 "register_operand" "a"))
(use (match_dup 4))
(use (reg:SI 19))]
! ""
"rep\;stosb|rep stosb"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
--- 14969,14992 ----
(use (match_operand:QI 2 "register_operand" "a"))
(use (match_dup 4))
(use (reg:SI 19))]
! "!TARGET_64BIT"
! "rep\;stosb|rep stosb"
! [(set_attr "type" "str")
! (set_attr "prefix_rep" "1")
! (set_attr "memory" "store")
! (set_attr "mode" "QI")])
!
! (define_insn "rep_stosqi_rex64"
! [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
! (set (match_operand:DI 0 "register_operand" "=D")
! (plus:DI (match_operand:DI 3 "register_operand" "0")
! (match_operand:DI 4 "register_operand" "1")))
! (set (mem:BLK (match_dup 3))
! (const_int 0))
! (use (match_operand:QI 2 "register_operand" "a"))
! (use (match_dup 4))
! (use (reg:DI 19))]
! "TARGET_64BIT"
"rep\;stosb|rep stosb"
[(set_attr "type" "str")
(set_attr "prefix_rep" "1")
***************
*** 12670,12677 ****
[(set (match_operand:SI 0 "register_operand" "")
(compare:SI (match_operand:BLK 1 "general_operand" "")
(match_operand:BLK 2 "general_operand" "")))
! (use (match_operand:SI 3 "general_operand" ""))
! (use (match_operand:SI 4 "immediate_operand" ""))]
""
"
{
--- 14997,15004 ----
[(set (match_operand:SI 0 "register_operand" "")
(compare:SI (match_operand:BLK 1 "general_operand" "")
(match_operand:BLK 2 "general_operand" "")))
! (use (match_operand 3 "general_operand" ""))
! (use (match_operand 4 "immediate_operand" ""))]
""
"
{
***************
*** 12685,12691 ****
addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0));
count = operands[3];
! countreg = copy_to_mode_reg (SImode, count);
/* %%% Iff we are testing strict equality, we can use known alignment
to good advantage. This may be possible with combine, particularly
--- 15012,15018 ----
addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0));
count = operands[3];
! countreg = copy_to_mode_reg (Pmode, count);
/* %%% Iff we are testing strict equality, we can use known alignment
to good advantage. This may be possible with combine, particularly
***************
*** 12700,12713 ****
emit_move_insn (operands[0], const0_rtx);
DONE;
}
! emit_insn (gen_cmpstrsi_nz_1 (addr1, addr2, countreg, align,
! addr1, addr2, countreg));
}
else
{
! emit_insn (gen_cmpsi_1 (countreg, countreg));
! emit_insn (gen_cmpstrsi_1 (addr1, addr2, countreg, align,
! addr1, addr2, countreg));
}
outlow = gen_lowpart (QImode, out);
--- 15027,15053 ----
emit_move_insn (operands[0], const0_rtx);
DONE;
}
! if (TARGET_64BIT)
! emit_insn (gen_cmpstrqi_nz_rex_1 (addr1, addr2, countreg, align,
! addr1, addr2, countreg));
! else
! emit_insn (gen_cmpstrqi_nz_1 (addr1, addr2, countreg, align,
! addr1, addr2, countreg));
}
else
{
! if (TARGET_64BIT)
! {
! emit_insn (gen_cmpdi_1_rex64 (countreg, countreg));
! emit_insn (gen_cmpstrqi_rex_1 (addr1, addr2, countreg, align,
! addr1, addr2, countreg));
! }
! else
! {
! emit_insn (gen_cmpsi_1 (countreg, countreg));
! emit_insn (gen_cmpstrqi_1 (addr1, addr2, countreg, align,
! addr1, addr2, countreg));
! }
}
outlow = gen_lowpart (QImode, out);
***************
*** 12738,12744 ****
;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is
;; zero. Emit extra code to make sure that a zero-length compare is EQ.
! (define_insn "cmpstrsi_nz_1"
[(set (reg:CC 17)
(compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
(mem:BLK (match_operand:SI 5 "register_operand" "1"))))
--- 15078,15084 ----
;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is
;; zero. Emit extra code to make sure that a zero-length compare is EQ.
! (define_insn "cmpstrqi_nz_1"
[(set (reg:CC 17)
(compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
(mem:BLK (match_operand:SI 5 "register_operand" "1"))))
***************
*** 12748,12754 ****
(clobber (match_operand:SI 0 "register_operand" "=S"))
(clobber (match_operand:SI 1 "register_operand" "=D"))
(clobber (match_operand:SI 2 "register_operand" "=c"))]
! ""
"repz{\;| }cmpsb"
[(set_attr "type" "str")
(set_attr "mode" "QI")
--- 15088,15110 ----
(clobber (match_operand:SI 0 "register_operand" "=S"))
(clobber (match_operand:SI 1 "register_operand" "=D"))
(clobber (match_operand:SI 2 "register_operand" "=c"))]
! "!TARGET_64BIT"
! "repz{\;| }cmpsb"
! [(set_attr "type" "str")
! (set_attr "mode" "QI")
! (set_attr "prefix_rep" "1")])
!
! (define_insn "cmpstrqi_nz_rex_1"
! [(set (reg:CC 17)
! (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
! (mem:BLK (match_operand:DI 5 "register_operand" "1"))))
! (use (match_operand:DI 6 "register_operand" "2"))
! (use (match_operand:SI 3 "immediate_operand" "i"))
! (use (reg:SI 19))
! (clobber (match_operand:DI 0 "register_operand" "=S"))
! (clobber (match_operand:DI 1 "register_operand" "=D"))
! (clobber (match_operand:DI 2 "register_operand" "=c"))]
! "TARGET_64BIT"
"repz{\;| }cmpsb"
[(set_attr "type" "str")
(set_attr "mode" "QI")
***************
*** 12756,12762 ****
;; The same, but the count is not known to not be zero.
! (define_insn "cmpstrsi_1"
[(set (reg:CC 17)
(if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2")
(const_int 0))
--- 15112,15118 ----
;; The same, but the count is not known to not be zero.
! (define_insn "cmpstrqi_1"
[(set (reg:CC 17)
(if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2")
(const_int 0))
***************
*** 12769,12775 ****
(clobber (match_operand:SI 0 "register_operand" "=S"))
(clobber (match_operand:SI 1 "register_operand" "=D"))
(clobber (match_operand:SI 2 "register_operand" "=c"))]
! ""
"repz{\;| }cmpsb"
[(set_attr "type" "str")
(set_attr "mode" "QI")
--- 15125,15150 ----
(clobber (match_operand:SI 0 "register_operand" "=S"))
(clobber (match_operand:SI 1 "register_operand" "=D"))
(clobber (match_operand:SI 2 "register_operand" "=c"))]
! "!TARGET_64BIT"
! "repz{\;| }cmpsb"
! [(set_attr "type" "str")
! (set_attr "mode" "QI")
! (set_attr "prefix_rep" "1")])
!
! (define_insn "cmpstrqi_rex_1"
! [(set (reg:CC 17)
! (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2")
! (const_int 0))
! (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
! (mem:BLK (match_operand:DI 5 "register_operand" "1")))
! (const_int 0)))
! (use (match_operand:SI 3 "immediate_operand" "i"))
! (use (reg:CC 17))
! (use (reg:SI 19))
! (clobber (match_operand:DI 0 "register_operand" "=S"))
! (clobber (match_operand:DI 1 "register_operand" "=D"))
! (clobber (match_operand:DI 2 "register_operand" "=c"))]
! "TARGET_64BIT"
"repz{\;| }cmpsb"
[(set_attr "type" "str")
(set_attr "mode" "QI")
***************
*** 12779,12854 ****
[(set (match_operand:SI 0 "register_operand" "")
(unspec:SI [(match_operand:BLK 1 "general_operand" "")
(match_operand:QI 2 "immediate_operand" "")
! (match_operand:SI 3 "immediate_operand" "")] 0))]
""
"
{
! rtx out, addr, scratch1, scratch2, scratch3;
! rtx eoschar = operands[2];
! rtx align = operands[3];
!
! /* The generic case of strlen expander is long. Avoid it's
! expanding unless TARGET_INLINE_ALL_STRINGOPS. */
!
! if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
! && !TARGET_INLINE_ALL_STRINGOPS
! && !optimize_size
! && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
! FAIL;
!
! out = operands[0];
! addr = force_reg (Pmode, XEXP (operands[1], 0));
! scratch1 = gen_reg_rtx (SImode);
!
! if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
! && !optimize_size)
! {
! /* Well it seems that some optimizer does not combine a call like
! foo(strlen(bar), strlen(bar));
! when the move and the subtraction is done here. It does calculate
! the length just once when these instructions are done inside of
! output_strlen_unroll(). But I think since &bar[strlen(bar)] is
! often used and I use one fewer register for the lifetime of
! output_strlen_unroll() this is better. */
!
! if (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)
! emit_move_insn (scratch1, addr);
!
! emit_move_insn (out, addr);
!
! ix86_expand_strlensi_unroll_1 (out, align, scratch1);
!
! /* strlensi_unroll_1 returns the address of the zero at the end of
! the string, like memchr(), so compute the length by subtracting
! the start address. */
! emit_insn (gen_subsi3 (out, out, addr));
! }
! else
! {
! scratch2 = gen_reg_rtx (SImode);
! scratch3 = gen_reg_rtx (SImode);
!
! emit_move_insn (scratch3, addr);
! emit_insn (gen_cld ());
! emit_insn (gen_strlensi_1 (scratch1, scratch3, eoschar,
! align, constm1_rtx, scratch3));
! emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
! emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
! }
! DONE;
}")
! (define_insn "strlensi_1"
[(set (match_operand:SI 0 "register_operand" "=&c")
(unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1"))
! (match_operand:QI 2 "general_operand" "a")
(match_operand:SI 3 "immediate_operand" "i")
! (match_operand:SI 4 "immediate_operand" "0")] 0))
(use (reg:SI 19))
(clobber (match_operand:SI 1 "register_operand" "=D"))
(clobber (reg:CC 17))]
! ""
"repnz{\;| }scasb"
[(set_attr "type" "str")
(set_attr "mode" "QI")
--- 15154,15208 ----
[(set (match_operand:SI 0 "register_operand" "")
(unspec:SI [(match_operand:BLK 1 "general_operand" "")
(match_operand:QI 2 "immediate_operand" "")
! (match_operand 3 "immediate_operand" "")] 0))]
""
"
{
! if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
! DONE;
! else
! FAIL;
! }")
! (define_expand "strlendi"
! [(set (match_operand:DI 0 "register_operand" "")
! (unspec:DI [(match_operand:BLK 1 "general_operand" "")
! (match_operand:QI 2 "immediate_operand" "")
! (match_operand 3 "immediate_operand" "")] 0))]
! ""
! "
! {
! if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
! DONE;
! else
! FAIL;
}")
! (define_insn "strlenqi_1"
[(set (match_operand:SI 0 "register_operand" "=&c")
(unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1"))
! (match_operand:QI 2 "register_operand" "a")
(match_operand:SI 3 "immediate_operand" "i")
! (match_operand:SI 4 "register_operand" "0")] 0))
(use (reg:SI 19))
(clobber (match_operand:SI 1 "register_operand" "=D"))
(clobber (reg:CC 17))]
! "!TARGET_64BIT"
! "repnz{\;| }scasb"
! [(set_attr "type" "str")
! (set_attr "mode" "QI")
! (set_attr "prefix_rep" "1")])
!
! (define_insn "strlenqi_rex_1"
! [(set (match_operand:DI 0 "register_operand" "=&c")
! (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1"))
! (match_operand:QI 2 "register_operand" "a")
! (match_operand:DI 3 "immediate_operand" "i")
! (match_operand:DI 4 "register_operand" "0")] 0))
! (use (reg:SI 19))
! (clobber (match_operand:DI 1 "register_operand" "=D"))
! (clobber (reg:CC 17))]
! "TARGET_64BIT"
"repnz{\;| }scasb"
[(set_attr "type" "str")
(set_attr "mode" "QI")
*** i386.c Thu Mar 22 20:29:02 2001
--- /p1/new/x86-64/gcc/gcc/config/i386/i386.c Sun Mar 25 00:37:22 2001
*************** static void ix86_set_move_mem_attrs_1 PA
*** 566,571 ****
--- 578,587 ----
static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
+ static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
+ static rtx ix86_zero_extend_to_Pmode PARAMS ((rtx));
+ static rtx ix86_expand_aligntest PARAMS ((rtx, int));
+ static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
struct ix86_address
{
*************** ix86_split_lshrdi (operands, scratch)
*** 6934,6984 ****
rtx low[2], high[2];
int count;
! if (GET_CODE (operands[2]) == CONST_INT)
{
! split_di (operands, 2, low, high);
! count = INTVAL (operands[2]) & 63;
! if (count >= 32)
! {
! emit_move_insn (low[0], high[1]);
! emit_move_insn (high[0], const0_rtx);
! if (count > 32)
! emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
! }
else
! {
! if (!rtx_equal_p (operands[0], operands[1]))
! emit_move_insn (operands[0], operands[1]);
! emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
! emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
! }
}
else
{
! if (!rtx_equal_p (operands[0], operands[1]))
! emit_move_insn (operands[0], operands[1]);
!
! split_di (operands, 1, low, high);
! emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
! emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
! /* Heh. By reversing the arguments, we can reuse this pattern. */
! if (TARGET_CMOVE && (! no_new_pseudos || scratch))
{
! if (! no_new_pseudos)
! scratch = force_reg (SImode, const0_rtx);
! else
! emit_move_insn (scratch, const0_rtx);
!
! emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
! scratch));
}
else
! emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
}
}
/* Expand the appropriate insns for doing strlen if not just doing
--- 8224,8810 ----
rtx low[2], high[2];
int count;
! if (GET_CODE (operands[2]) == CONST_INT)
! {
! split_di (operands, 2, low, high);
! count = INTVAL (operands[2]) & 63;
!
! if (count >= 32)
! {
! emit_move_insn (low[0], high[1]);
! emit_move_insn (high[0], const0_rtx);
!
! if (count > 32)
! emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
! }
! else
! {
! if (!rtx_equal_p (operands[0], operands[1]))
! emit_move_insn (operands[0], operands[1]);
! emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
! emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
! }
! }
! else
! {
! if (!rtx_equal_p (operands[0], operands[1]))
! emit_move_insn (operands[0], operands[1]);
!
! split_di (operands, 1, low, high);
!
! emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
! emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
!
! /* Heh. By reversing the arguments, we can reuse this pattern. */
! if (TARGET_CMOVE && (! no_new_pseudos || scratch))
! {
! if (! no_new_pseudos)
! scratch = force_reg (SImode, const0_rtx);
! else
! emit_move_insn (scratch, const0_rtx);
!
! emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
! scratch));
! }
! else
! emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
! }
! }
!
! /* Helper function for the string operations bellow. Dest VARIABLE whether
! it is aligned to VALUE bytes. If true, jump to the label. */
! static rtx
! ix86_expand_aligntest (variable, value)
! rtx variable;
! int value;
! {
! rtx label = gen_label_rtx ();
! rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
! if (GET_MODE (variable) == DImode)
! emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
! else
! emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
! emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
! 1, 0, label);
! return label;
! }
!
! /* Adjust COUNTER by the VALUE. */
! static void
! ix86_adjust_counter (countreg, value)
! rtx countreg;
! HOST_WIDE_INT value;
! {
! if (GET_MODE (countreg) == DImode)
! emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
! else
! emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
! }
!
! /* Zero extend possibly SImode EXP to Pmode register. */
! static rtx
! ix86_zero_extend_to_Pmode (exp)
! rtx exp;
! {
! rtx r;
! if (GET_MODE (exp) == VOIDmode)
! return force_reg (Pmode, exp);
! if (GET_MODE (exp) == Pmode)
! return copy_to_mode_reg (Pmode, exp);
! r = gen_reg_rtx (Pmode);
! emit_insn (gen_zero_extendsidi2 (r, exp));
! return r;
! }
!
! /* Expand string move (memcpy) operation. Use i386 string operations when
! profitable. expand_clrstr contains similar code. */
! int
! ix86_expand_movstr (dst, src, count_exp, align_exp)
! rtx dst, src, count_exp, align_exp;
! {
! rtx srcreg, destreg, countreg;
! enum machine_mode counter_mode;
! HOST_WIDE_INT align = 0;
! unsigned HOST_WIDE_INT count = 0;
! rtx insns;
!
! start_sequence ();
!
! if (GET_CODE (align_exp) == CONST_INT)
! align = INTVAL (align_exp);
!
! /* This simple hack avoids all inlining code and simplifies code bellow. */
! if (!TARGET_ALIGN_STRINGOPS)
! align = 64;
!
! if (GET_CODE (count_exp) == CONST_INT)
! count = INTVAL (count_exp);
!
! /* Figure out proper mode for counter. For 32bits it is always SImode,
! for 64bits use SImode when possible, otherwise DImode.
! Set count to number of bytes copied when known at compile time. */
! if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
! || x86_64_zero_extended_value (count_exp))
! counter_mode = SImode;
! else
! counter_mode = DImode;
!
! if (counter_mode != SImode && counter_mode != DImode)
! abort ();
!
! destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
! srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
!
! emit_insn (gen_cld ());
!
! /* When optimizing for size emit simple rep ; movsb instruction for
! counts not divisible by 4. */
!
! if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
! {
! countreg = ix86_zero_extend_to_Pmode (count_exp);
! if (TARGET_64BIT)
! emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
! destreg, srcreg, countreg));
! else
! emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
! destreg, srcreg, countreg));
! }
!
! /* For constant aligned (or small unaligned) copies use rep movsl
! followed by code copying the rest. For PentiumPro ensure 8 byte
! alignment to allow rep movsl acceleration. */
!
! else if (count != 0
! && (align >= 8
! || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
! || optimize_size || count < (unsigned int)64))
! {
! int size = TARGET_64BIT && !optimize_size ? 8 : 4;
! if (count & ~(size - 1))
! {
! countreg = copy_to_mode_reg (counter_mode,
! GEN_INT ((count >> (size == 4 ? 2 : 3))
! & (TARGET_64BIT ? -1 : 0x3fffffff)));
! countreg = ix86_zero_extend_to_Pmode (countreg);
! if (size == 4)
! {
! if (TARGET_64BIT)
! emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
! destreg, srcreg, countreg));
! else
! emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
! destreg, srcreg, countreg));
! }
! else
! emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
! destreg, srcreg, countreg));
! }
! if (size == 8 && (count & 0x04))
! emit_insn (gen_strmovsi (destreg, srcreg));
! if (count & 0x02)
! emit_insn (gen_strmovhi (destreg, srcreg));
! if (count & 0x01)
! emit_insn (gen_strmovqi (destreg, srcreg));
! }
! /* The generic code based on the glibc implementation:
! - align destination to 4 bytes (8 byte alignment is used for PentiumPro
! allowing accelerated copying there)
! - copy the data using rep movsl
! - copy the rest. */
! else
! {
! rtx countreg2;
! rtx label = NULL;
!
! /* In case we don't know anything about the alignment, default to
! library version, since it is usually equally fast and result in
! shorter code. */
! if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
! {
! end_sequence ();
! return 0;
! }
!
! if (TARGET_SINGLE_STRINGOP)
! emit_insn (gen_cld ());
!
! countreg2 = gen_reg_rtx (Pmode);
! countreg = copy_to_mode_reg (counter_mode, count_exp);
!
! /* We don't use loops to align destination and to copy parts smaller
! than 4 bytes, because gcc is able to optimize such code better (in
! the case the destination or the count really is aligned, gcc is often
! able to predict the branches) and also it is friendlier to the
! hardware branch prediction.
!
! Using loops is benefical for generic case, because we can
! handle small counts using the loops. Many CPUs (such as Athlon)
! have large REP prefix setup costs.
!
! This is quite costy. Maybe we can revisit this decision later or
! add some customizability to this code. */
!
! if (count == 0
! && align < (TARGET_PENTIUMPRO && (count == 0
! || count >= (unsigned int)260)
! ? 8 : UNITS_PER_WORD))
! {
! label = gen_label_rtx ();
! emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
! LEU, 0, counter_mode, 1, 0, label);
! }
! if (align <= 1)
! {
! rtx label = ix86_expand_aligntest (destreg, 1);
! emit_insn (gen_strmovqi (destreg, srcreg));
! ix86_adjust_counter (countreg, 1);
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align <= 2)
! {
! rtx label = ix86_expand_aligntest (destreg, 2);
! emit_insn (gen_strmovhi (destreg, srcreg));
! ix86_adjust_counter (countreg, 2);
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align <= 4
! && ((TARGET_PENTIUMPRO && (count == 0
! || count >= (unsigned int)260))
! || TARGET_64BIT))
! {
! rtx label = ix86_expand_aligntest (destreg, 4);
! emit_insn (gen_strmovsi (destreg, srcreg));
! ix86_adjust_counter (countreg, 4);
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
!
! if (!TARGET_SINGLE_STRINGOP)
! emit_insn (gen_cld ());
! if (TARGET_64BIT)
! {
! emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
! GEN_INT (3)));
! emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
! destreg, srcreg, countreg2));
! }
! else
! {
! emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
! emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
! destreg, srcreg, countreg2));
! }
!
! if (label)
! {
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
! emit_insn (gen_strmovsi (destreg, srcreg));
! if ((align <= 4 || count == 0) && TARGET_64BIT)
! {
! rtx label = ix86_expand_aligntest (countreg, 4);
! emit_insn (gen_strmovsi (destreg, srcreg));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align > 2 && count != 0 && (count & 2))
! emit_insn (gen_strmovhi (destreg, srcreg));
! if (align <= 2 || count == 0)
! {
! rtx label = ix86_expand_aligntest (countreg, 2);
! emit_insn (gen_strmovhi (destreg, srcreg));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align > 1 && count != 0 && (count & 1))
! emit_insn (gen_strmovqi (destreg, srcreg));
! if (align <= 1 || count == 0)
! {
! rtx label = ix86_expand_aligntest (countreg, 1);
! emit_insn (gen_strmovqi (destreg, srcreg));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! }
!
! insns = get_insns ();
! end_sequence ();
!
! ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
! emit_insns (insns);
! return 1;
! }
!
! /* Expand string clear operation (bzero). Use i386 string operations when
! profitable. expand_movstr contains similar code. */
! int
! ix86_expand_clrstr (src, count_exp, align_exp)
! rtx src, count_exp, align_exp;
! {
! rtx destreg, zeroreg, countreg;
! enum machine_mode counter_mode;
! HOST_WIDE_INT align = 0;
! unsigned HOST_WIDE_INT count = 0;
!
! if (GET_CODE (align_exp) == CONST_INT)
! align = INTVAL (align_exp);
!
! /* This simple hack avoids all inlining code and simplifies code bellow. */
! if (!TARGET_ALIGN_STRINGOPS)
! align = 32;
!
! if (GET_CODE (count_exp) == CONST_INT)
! count = INTVAL (count_exp);
! /* Figure out proper mode for counter. For 32bits it is always SImode,
! for 64bits use SImode when possible, otherwise DImode.
! Set count to number of bytes copied when known at compile time. */
! if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
! || x86_64_zero_extended_value (count_exp))
! counter_mode = SImode;
! else
! counter_mode = DImode;
!
! destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
!
! emit_insn (gen_cld ());
!
! /* When optimizing for size emit simple rep ; movsb instruction for
! counts not divisible by 4. */
!
! if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
! {
! countreg = ix86_zero_extend_to_Pmode (count_exp);
! zeroreg = copy_to_mode_reg (QImode, const0_rtx);
! if (TARGET_64BIT)
! emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
! destreg, countreg));
! else
! emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
! destreg, countreg));
! }
! else if (count != 0
! && (align >= 8
! || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
! || optimize_size || count < (unsigned int)64))
! {
! int size = TARGET_64BIT && !optimize_size ? 8 : 4;
! zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
! if (count & ~(size - 1))
! {
! countreg = copy_to_mode_reg (counter_mode,
! GEN_INT ((count >> (size == 4 ? 2 : 3))
! & (TARGET_64BIT ? -1 : 0x3fffffff)));
! countreg = ix86_zero_extend_to_Pmode (countreg);
! if (size == 4)
! {
! if (TARGET_64BIT)
! emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
! destreg, countreg));
! else
! emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
! destreg, countreg));
! }
! else
! emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
! destreg, countreg));
! }
! if (size == 8 && (count & 0x04))
! emit_insn (gen_strsetsi (destreg,
! gen_rtx_SUBREG (SImode, zeroreg, 0)));
! if (count & 0x02)
! emit_insn (gen_strsethi (destreg,
! gen_rtx_SUBREG (HImode, zeroreg, 0)));
! if (count & 0x01)
! emit_insn (gen_strsetqi (destreg,
! gen_rtx_SUBREG (QImode, zeroreg, 0)));
! }
! else
! {
! rtx countreg2;
! rtx label = NULL;
!
! /* In case we don't know anything about the alignment, default to
! library version, since it is usually equally fast and result in
! shorter code. */
! if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
! return 0;
!
! if (TARGET_SINGLE_STRINGOP)
! emit_insn (gen_cld ());
!
! countreg2 = gen_reg_rtx (Pmode);
! countreg = copy_to_mode_reg (counter_mode, count_exp);
! zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
!
! if (count == 0
! && align < (TARGET_PENTIUMPRO && (count == 0
! || count >= (unsigned int)260)
! ? 8 : UNITS_PER_WORD))
! {
! label = gen_label_rtx ();
! emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
! LEU, 0, counter_mode, 1, 0, label);
! }
! if (align <= 1)
! {
! rtx label = ix86_expand_aligntest (destreg, 1);
! emit_insn (gen_strsetqi (destreg,
! gen_rtx_SUBREG (QImode, zeroreg, 0)));
! ix86_adjust_counter (countreg, 1);
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align <= 2)
! {
! rtx label = ix86_expand_aligntest (destreg, 2);
! emit_insn (gen_strsethi (destreg,
! gen_rtx_SUBREG (HImode, zeroreg, 0)));
! ix86_adjust_counter (countreg, 2);
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
! || count >= (unsigned int)260))
! {
! rtx label = ix86_expand_aligntest (destreg, 4);
! emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
! ? gen_rtx_SUBREG (SImode, zeroreg, 0)
! : zeroreg)));
! ix86_adjust_counter (countreg, 4);
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
!
! if (!TARGET_SINGLE_STRINGOP)
! emit_insn (gen_cld ());
! if (TARGET_64BIT)
! {
! emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
! GEN_INT (3)));
! emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
! destreg, countreg2));
! }
! else
! {
! emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
! emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
! destreg, countreg2));
! }
!
! if (label)
! {
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
! emit_insn (gen_strsetsi (destreg,
! gen_rtx_SUBREG (SImode, zeroreg, 0)));
! if (TARGET_64BIT && (align <= 4 || count == 0))
! {
! rtx label = ix86_expand_aligntest (destreg, 2);
! emit_insn (gen_strsetsi (destreg,
! gen_rtx_SUBREG (SImode, zeroreg, 0)));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align > 2 && count != 0 && (count & 2))
! emit_insn (gen_strsethi (destreg,
! gen_rtx_SUBREG (HImode, zeroreg, 0)));
! if (align <= 2 || count == 0)
! {
! rtx label = ix86_expand_aligntest (destreg, 2);
! emit_insn (gen_strsethi (destreg,
! gen_rtx_SUBREG (HImode, zeroreg, 0)));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! if (align > 1 && count != 0 && (count & 1))
! emit_insn (gen_strsetqi (destreg,
! gen_rtx_SUBREG (QImode, zeroreg, 0)));
! if (align <= 1 || count == 0)
! {
! rtx label = ix86_expand_aligntest (destreg, 1);
! emit_insn (gen_strsetqi (destreg,
! gen_rtx_SUBREG (QImode, zeroreg, 0)));
! emit_label (label);
! LABEL_NUSES (label) = 1;
! }
! }
! return 1;
! }
! /* Expand strlen. */
! int
! ix86_expand_strlen (out, src, eoschar, align)
! rtx out, src, eoschar, align;
! {
! rtx addr, scratch1, scratch2, scratch3, scratch4;
!
! /* The generic case of strlen expander is long. Avoid it's
! expanding unless TARGET_INLINE_ALL_STRINGOPS. */
!
! if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
! && !TARGET_INLINE_ALL_STRINGOPS
! && !optimize_size
! && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
! return 0;
!
! addr = force_reg (Pmode, XEXP (src, 0));
! scratch1 = gen_reg_rtx (Pmode);
!
! if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
! && !optimize_size)
{
! /* Well it seems that some optimizer does not combine a call like
! foo(strlen(bar), strlen(bar));
! when the move and the subtraction is done here. It does calculate
! the length just once when these instructions are done inside of
! output_strlen_unroll(). But I think since &bar[strlen(bar)] is
! often used and I use one fewer register for the lifetime of
! output_strlen_unroll() this is better. */
! emit_move_insn (out, addr);
! ix86_expand_strlensi_unroll_1 (out, align);
!
! /* strlensi_unroll_1 returns the address of the zero at the end of
! the string, like memchr(), so compute the length by subtracting
! the start address. */
! if (TARGET_64BIT)
! emit_insn (gen_subdi3 (out, out, addr));
else
! emit_insn (gen_subsi3 (out, out, addr));
}
else
{
! scratch2 = gen_reg_rtx (Pmode);
! scratch3 = gen_reg_rtx (Pmode);
! scratch4 = force_reg (Pmode, constm1_rtx);
! emit_move_insn (scratch3, addr);
! eoschar = force_reg (QImode, eoschar);
! emit_insn (gen_cld ());
! if (TARGET_64BIT)
{
! emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
! align, scratch4, scratch3));
! emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
! emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
}
else
! {
! emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
! align, scratch4, scratch3));
! emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
! emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
! }
}
+ return 1;
}
/* Expand the appropriate insns for doing strlen if not just doing
*************** ix86_split_lshrdi (operands, scratch)
*** 6992,7000 ****
This is just the body. It needs the initialisations mentioned above and
some address computing at the end. These things are done in i386.md. */
! void
! ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
! rtx out, align_rtx, scratch;
{
int align;
rtx tmp;
--- 8816,8824 ----
This is just the body. It needs the initialisations mentioned above and
some address computing at the end. These things are done in i386.md. */
! static void
! ix86_expand_strlensi_unroll_1 (out, align_rtx)
! rtx out, align_rtx;
{
int align;
rtx tmp;
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7004,7009 ****
--- 8828,8834 ----
rtx end_0_label = gen_label_rtx ();
rtx mem;
rtx tmpreg = gen_reg_rtx (SImode);
+ rtx scratch = gen_reg_rtx (SImode);
align = 0;
if (GET_CODE (align_rtx) == CONST_INT)
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7014,7019 ****
--- 8839,8846 ----
/* Is there a known alignment and is it less than 4? */
if (align < 4)
{
+ rtx scratch1 = gen_reg_rtx (Pmode);
+ emit_move_insn (scratch1, out);
/* Is there a known alignment and is it not 2? */
if (align != 2)
{
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7021,7046 ****
align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
/* Leave just the 3 lower bits. */
! align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
NULL_RTX, 0, OPTAB_WIDEN);
emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
! SImode, 1, 0, align_4_label);
emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
! SImode, 1, 0, align_2_label);
emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
! SImode, 1, 0, align_3_label);
}
else
{
/* Since the alignment is 2, we have to check 2 or 0 bytes;
check if is aligned to 4 - byte. */
! align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
NULL_RTX, 0, OPTAB_WIDEN);
emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
! SImode, 1, 0, align_4_label);
}
mem = gen_rtx_MEM (QImode, out);
--- 8848,8873 ----
align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
/* Leave just the 3 lower bits. */
! align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
NULL_RTX, 0, OPTAB_WIDEN);
emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
! Pmode, 1, 0, align_4_label);
emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
! Pmode, 1, 0, align_2_label);
emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
! Pmode, 1, 0, align_3_label);
}
else
{
/* Since the alignment is 2, we have to check 2 or 0 bytes;
check if is aligned to 4 - byte. */
! align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
NULL_RTX, 0, OPTAB_WIDEN);
emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
! Pmode, 1, 0, align_4_label);
}
mem = gen_rtx_MEM (QImode, out);
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7052,7058 ****
QImode, 1, 0, end_0_label);
/* Increment the address. */
! emit_insn (gen_addsi3 (out, out, const1_rtx));
/* Not needed with an alignment of 2 */
if (align != 2)
--- 8879,8888 ----
QImode, 1, 0, end_0_label);
/* Increment the address. */
! if (TARGET_64BIT)
! emit_insn (gen_adddi3 (out, out, const1_rtx));
! else
! emit_insn (gen_addsi3 (out, out, const1_rtx));
/* Not needed with an alignment of 2 */
if (align != 2)
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7062,7068 ****
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
QImode, 1, 0, end_0_label);
! emit_insn (gen_addsi3 (out, out, const1_rtx));
emit_label (align_3_label);
}
--- 8892,8901 ----
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
QImode, 1, 0, end_0_label);
! if (TARGET_64BIT)
! emit_insn (gen_adddi3 (out, out, const1_rtx));
! else
! emit_insn (gen_addsi3 (out, out, const1_rtx));
emit_label (align_3_label);
}
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7070,7076 ****
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
QImode, 1, 0, end_0_label);
! emit_insn (gen_addsi3 (out, out, const1_rtx));
}
/* Generate loop to check 4 bytes at a time. It is not a good idea to
--- 8903,8912 ----
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
QImode, 1, 0, end_0_label);
! if (TARGET_64BIT)
! emit_insn (gen_adddi3 (out, out, const1_rtx));
! else
! emit_insn (gen_addsi3 (out, out, const1_rtx));
}
/* Generate loop to check 4 bytes at a time. It is not a good idea to
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7080,7086 ****
mem = gen_rtx_MEM (SImode, out);
emit_move_insn (scratch, mem);
! emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
/* This formula yields a nonzero result iff one of the bytes is zero.
This saves three branches inside loop and many cycles. */
--- 8916,8925 ----
mem = gen_rtx_MEM (SImode, out);
emit_move_insn (scratch, mem);
! if (TARGET_64BIT)
! emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
! else
! emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
/* This formula yields a nonzero result iff one of the bytes is zero.
This saves three branches inside loop and many cycles. */
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7095,7100 ****
--- 8934,8940 ----
if (TARGET_CMOVE)
{
rtx reg = gen_reg_rtx (SImode);
+ rtx reg2 = gen_reg_rtx (Pmode);
emit_move_insn (reg, tmpreg);
emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7107,7121 ****
reg,
tmpreg)));
/* Emit lea manually to avoid clobbering of flags. */
! emit_insn (gen_rtx_SET (SImode, reg,
! gen_rtx_PLUS (SImode, out, GEN_INT (2))));
tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
emit_insn (gen_rtx_SET (VOIDmode, out,
! gen_rtx_IF_THEN_ELSE (SImode, tmp,
! reg,
! out)));
}
else
--- 8947,8961 ----
reg,
tmpreg)));
/* Emit lea manually to avoid clobbering of flags. */
! emit_insn (gen_rtx_SET (SImode, reg2,
! gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
emit_insn (gen_rtx_SET (VOIDmode, out,
! gen_rtx_IF_THEN_ELSE (Pmode, tmp,
! reg2,
! out)));
}
else
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7134,7140 ****
/* Not in the first two. Move two bytes forward. */
emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
! emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
emit_label (end_2_label);
--- 8974,8983 ----
/* Not in the first two. Move two bytes forward. */
emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
! if (TARGET_64BIT)
! emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
! else
! emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
emit_label (end_2_label);
*************** ix86_expand_strlensi_unroll_1 (out, alig
*** 7143,7149 ****
/* Avoid branch in fixing the byte. */
tmpreg = gen_lowpart (QImode, tmpreg);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
! emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
emit_label (end_0_label);
}
--- 8986,8995 ----
/* Avoid branch in fixing the byte. */
tmpreg = gen_lowpart (QImode, tmpreg);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
! if (TARGET_64BIT)
! emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
! else
! emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
emit_label (end_0_label);
}
*** i386-protos.h Wed Mar 21 19:53:08 2001
--- /p1/new/x86-64/gcc/gcc/config/i386/i386-protos.h Wed Mar 21 10:28:12 2001
*************** extern int promotable_binary_operator PA
*** 77,85 ****
--- 78,89 ----
extern int memory_displacement_operand PARAMS ((rtx, enum machine_mode));
extern int cmpsi_operand PARAMS ((rtx, enum machine_mode));
extern int long_memory_operand PARAMS ((rtx, enum machine_mode));
extern int aligned_operand PARAMS ((rtx, enum machine_mode));
extern enum machine_mode ix86_cc_mode PARAMS ((enum rtx_code, rtx, rtx));
+ extern int ix86_expand_movstr PARAMS ((rtx, rtx, rtx, rtx));
+ extern int ix86_expand_clrstr PARAMS ((rtx, rtx, rtx));
+ extern int ix86_expand_strlen PARAMS ((rtx, rtx, rtx, rtx));
extern int legitimate_pic_address_disp_p PARAMS ((rtx));
extern int legitimate_address_p PARAMS ((enum machine_mode, rtx, int));
*************** extern int ix86_split_long_move PARAMS (
*** 119,125 ****
extern void ix86_split_ashldi PARAMS ((rtx *, rtx));
extern void ix86_split_ashrdi PARAMS ((rtx *, rtx));
extern void ix86_split_lshrdi PARAMS ((rtx *, rtx));
- extern void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx, rtx));
extern int ix86_address_cost PARAMS ((rtx));
extern rtx ix86_find_base_term PARAMS ((rtx));
--- 124,129 ----
More information about the Gcc-patches
mailing list