emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
ix86_expand_ashlsi3_const (low[0], count);
}
+ return;
}
- else
+
+ split_di (operands, 1, low, high);
+
+ if (operands[1] == const1_rtx)
{
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
+ /* Assuming we've chosen a QImode capable registers, then 1LL << N
+ can be done with two 32-bit shifts, no branches, no cmoves. */
+ if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
+ {
+ rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
- split_di (operands, 1, low, high);
+ ix86_expand_clear (low[0]);
+ ix86_expand_clear (high[0]);
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
+
+ d = gen_lowpart (QImode, low[0]);
+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ s = gen_rtx_EQ (QImode, flags, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, d, s));
- emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
- emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
+ d = gen_lowpart (QImode, high[0]);
+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ s = gen_rtx_NE (QImode, flags, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, d, s));
+ }
- if (TARGET_CMOVE && (! no_new_pseudos || scratch))
+ /* Otherwise, we can get the same results by manually performing
+ a bit extract operation on bit 5, and then performing the two
+ shifts. The two methods of getting 0/1 into low/high are exactly
+ the same size. Avoiding the shift in the bit extract case helps
+ pentium4 a bit; no one else seems to care much either way. */
+ else
{
- if (! no_new_pseudos)
- scratch = force_reg (SImode, const0_rtx);
+ rtx x;
+
+ if (TARGET_PARTIAL_REG_STALL && !optimize_size)
+ x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
else
- emit_move_insn (scratch, const0_rtx);
+ x = gen_lowpart (SImode, operands[2]);
+ emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
- emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
- scratch));
+ emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
+ emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
+ emit_move_insn (low[0], high[0]);
+ emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
}
+
+ emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
+ emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
+ return;
+ }
+
+ if (operands[1] == constm1_rtx)
+ {
+ /* For -1LL << N, we can avoid the shld instruction, because we
+ know that we're shifting 0...31 ones into a -1. */
+ emit_move_insn (low[0], constm1_rtx);
+ if (optimize_size)
+ emit_move_insn (high[0], low[0]);
else
- emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
+ emit_move_insn (high[0], constm1_rtx);
}
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ split_di (operands, 1, low, high);
+ emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
+ }
+
+ emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
+
+ if (TARGET_CMOVE && scratch)
+ {
+ ix86_expand_clear (scratch);
+ emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
+ }
+ else
+ emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
}
void
else if (count >= 32)
{
emit_move_insn (low[0], high[1]);
-
- if (! reload_completed)
- emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
- else
- {
- emit_move_insn (high[0], low[0]);
- emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
- }
-
+ emit_move_insn (high[0], low[0]);
+ emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
if (count > 32)
emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
}
emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
- if (TARGET_CMOVE && (! no_new_pseudos || scratch))
+ if (TARGET_CMOVE && scratch)
{
- if (! no_new_pseudos)
- scratch = gen_reg_rtx (SImode);
emit_move_insn (scratch, high[0]);
emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
if (count >= 32)
{
emit_move_insn (low[0], high[1]);
- emit_move_insn (high[0], const0_rtx);
+ ix86_expand_clear (high[0]);
if (count > 32)
emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
/* Heh. By reversing the arguments, we can reuse this pattern. */
- if (TARGET_CMOVE && (! no_new_pseudos || scratch))
+ if (TARGET_CMOVE && scratch)
{
- if (! no_new_pseudos)
- scratch = force_reg (SImode, const0_rtx);
- else
- emit_move_insn (scratch, const0_rtx);
-
+ ix86_expand_clear (scratch);
emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
scratch));
}
(define_split
[(set (match_operand:DI 0 "push_operand" "")
(match_operand:DI 1 "immediate_operand" ""))]
- "TARGET_64BIT && (flow2_completed || (reload_completed && !flag_peephole2))
+ "TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)
&& !symbolic_operand (operands[1], DImode)
&& !x86_64_immediate_operand (operands[1], DImode)"
[(set (match_dup 0) (match_dup 1))
(define_split
[(set (match_operand:DI 0 "memory_operand" "")
(match_operand:DI 1 "immediate_operand" ""))]
- "TARGET_64BIT && (flow2_completed || (reload_completed && !flag_peephole2))
+ "TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)
&& !symbolic_operand (operands[1], DImode)
&& !x86_64_immediate_operand (operands[1], DImode)"
[(set (match_dup 2) (match_dup 3))
"")
(define_insn "*testqi_1"
- [(set (reg 17)
+ [(set (reg FLAGS_REG)
(compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r")
(match_operand:QI 1 "general_operand" "n,n,qn,n"))
(const_int 0)))]
;; than 31.
(define_expand "ashldi3"
- [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "")
- (ashift:DI (match_operand:DI 1 "shiftdi_operand" "")
- (match_operand:QI 2 "nonmemory_operand" "")))
- (clobber (reg:CC FLAGS_REG))])]
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (ashift:DI (match_operand:DI 1 "ashldi_input_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
""
-{
- if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode))
- {
- emit_insn (gen_ashldi3_1 (operands[0], operands[1], operands[2]));
- DONE;
- }
- ix86_expand_binary_operator (ASHIFT, DImode, operands);
- DONE;
-})
+ "ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE;")
(define_insn "*ashldi3_1_rex64"
[(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
(const_string "ishift")))
(set_attr "mode" "DI")])
-(define_insn "ashldi3_1"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (ashift:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:QI 2 "nonmemory_operand" "Jc")))
- (clobber (match_scratch:SI 3 "=&r"))
- (clobber (reg:CC FLAGS_REG))]
- "!TARGET_64BIT && TARGET_CMOVE"
- "#"
- [(set_attr "type" "multi")])
-
-(define_insn "*ashldi3_2"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (ashift:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:QI 2 "nonmemory_operand" "Jc")))
+(define_insn "*ashldi3_1"
+ [(set (match_operand:DI 0 "register_operand" "=&r,r")
+ (ashift:DI (match_operand:DI 1 "reg_or_pm1_operand" "n,0")
+ (match_operand:QI 2 "nonmemory_operand" "Jc,Jc")))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_64BIT"
"#"
[(set_attr "type" "multi")])
-(define_split
- [(set (match_operand:DI 0 "register_operand" "")
- (ashift:DI (match_operand:DI 1 "register_operand" "")
- (match_operand:QI 2 "nonmemory_operand" "")))
- (clobber (match_scratch:SI 3 ""))
- (clobber (reg:CC FLAGS_REG))]
- "!TARGET_64BIT && TARGET_CMOVE && reload_completed"
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium. But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "!TARGET_64BIT && TARGET_CMOVE"
[(const_int 0)]
"ix86_split_ashldi (operands, operands[3]); DONE;")
(define_split
[(set (match_operand:DI 0 "register_operand" "")
- (ashift:DI (match_operand:DI 1 "register_operand" "")
+ (ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
(match_operand:QI 2 "nonmemory_operand" "")))
(clobber (reg:CC FLAGS_REG))]
- "!TARGET_64BIT && reload_completed"
+ "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)"
[(const_int 0)]
"ix86_split_ashldi (operands, NULL_RTX); DONE;")
JUMP_LABEL (tmp) = label;
emit_move_insn (operands[0], operands[1]);
- emit_move_insn (operands[1], const0_rtx);
+ ix86_expand_clear (operands[1]);
emit_label (label);
LABEL_NUSES (label) = 1;
;; See comment above `ashldi3' about how this works.
(define_expand "ashrdi3"
- [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "")
- (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
- (match_operand:QI 2 "nonmemory_operand" "")))
- (clobber (reg:CC FLAGS_REG))])]
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
""
-{
- if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode))
- {
- emit_insn (gen_ashrdi3_1 (operands[0], operands[1], operands[2]));
- DONE;
- }
- ix86_expand_binary_operator (ASHIFTRT, DImode, operands);
- DONE;
-})
+ "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;")
-(define_insn "ashrdi3_63_rex64"
+(define_insn "*ashrdi3_63_rex64"
[(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
(match_operand:DI 2 "const_int_operand" "i,i")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && INTVAL (operands[2]) == 63 && (TARGET_USE_CLTD || optimize_size)
+ "TARGET_64BIT && INTVAL (operands[2]) == 63
+ && (TARGET_USE_CLTD || optimize_size)
&& ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
"@
{cqto|cqo}
[(set_attr "type" "ishift")
(set_attr "mode" "DI")])
-
-(define_insn "ashrdi3_1"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:QI 2 "nonmemory_operand" "Jc")))
- (clobber (match_scratch:SI 3 "=&r"))
- (clobber (reg:CC FLAGS_REG))]
- "!TARGET_64BIT && TARGET_CMOVE"
- "#"
- [(set_attr "type" "multi")])
-
-(define_insn "*ashrdi3_2"
+(define_insn "*ashrdi3_1"
[(set (match_operand:DI 0 "register_operand" "=r")
(ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
(match_operand:QI 2 "nonmemory_operand" "Jc")))
"#"
[(set_attr "type" "multi")])
-(define_split
- [(set (match_operand:DI 0 "register_operand" "")
- (ashiftrt:DI (match_operand:DI 1 "register_operand" "")
- (match_operand:QI 2 "nonmemory_operand" "")))
- (clobber (match_scratch:SI 3 ""))
- (clobber (reg:CC FLAGS_REG))]
- "!TARGET_64BIT && TARGET_CMOVE && reload_completed"
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium. But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "!TARGET_64BIT && TARGET_CMOVE"
[(const_int 0)]
"ix86_split_ashrdi (operands, operands[3]); DONE;")
(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
(match_operand:QI 2 "nonmemory_operand" "")))
(clobber (reg:CC FLAGS_REG))]
- "!TARGET_64BIT && reload_completed"
+ "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)"
[(const_int 0)]
"ix86_split_ashrdi (operands, NULL_RTX); DONE;")
;; See comment above `ashldi3' about how this works.
(define_expand "lshrdi3"
- [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "")
- (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
- (match_operand:QI 2 "nonmemory_operand" "")))
- (clobber (reg:CC FLAGS_REG))])]
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
""
-{
- if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode))
- {
- emit_insn (gen_lshrdi3_1 (operands[0], operands[1], operands[2]));
- DONE;
- }
- ix86_expand_binary_operator (LSHIFTRT, DImode, operands);
- DONE;
-})
+ "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;")
(define_insn "*lshrdi3_1_one_bit_rex64"
[(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
[(set_attr "type" "ishift")
(set_attr "mode" "DI")])
-(define_insn "lshrdi3_1"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:QI 2 "nonmemory_operand" "Jc")))
- (clobber (match_scratch:SI 3 "=&r"))
- (clobber (reg:CC FLAGS_REG))]
- "!TARGET_64BIT && TARGET_CMOVE"
- "#"
- [(set_attr "type" "multi")])
-
-(define_insn "*lshrdi3_2"
+(define_insn "*lshrdi3_1"
[(set (match_operand:DI 0 "register_operand" "=r")
(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
(match_operand:QI 2 "nonmemory_operand" "Jc")))
"#"
[(set_attr "type" "multi")])
-(define_split
- [(set (match_operand:DI 0 "register_operand" "")
- (lshiftrt:DI (match_operand:DI 1 "register_operand" "")
- (match_operand:QI 2 "nonmemory_operand" "")))
- (clobber (match_scratch:SI 3 ""))
- (clobber (reg:CC FLAGS_REG))]
- "!TARGET_64BIT && TARGET_CMOVE && reload_completed"
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium. But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "!TARGET_64BIT && TARGET_CMOVE"
[(const_int 0)]
"ix86_split_lshrdi (operands, operands[3]); DONE;")
(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
(match_operand:QI 2 "nonmemory_operand" "")))
(clobber (reg:CC FLAGS_REG))]
- "!TARGET_64BIT && reload_completed"
+ "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)"
[(const_int 0)]
"ix86_split_lshrdi (operands, NULL_RTX); DONE;")
[(set_attr "type" "setcc")
(set_attr "mode" "QI")])
-(define_insn "setcc_2"
+(define_insn "*setcc_2"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
(match_operator:QI 1 "ix86_comparison_operator"
[(reg 17) (const_int 0)]))]