This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
RFA: try original clobbers for combiner splitters
- From: Joern RENNECKE <joern dot rennecke at st dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 02 Mar 2005 21:38:14 +0000
- Subject: RFA: try original clobbers for combiner splitters
This patch was first posted here together with a number of other patches:
http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01937.html
http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01938.html
However, I didn't have time to separate and explain the patches then,
so let's start with this combine.c one.
Consider these patterns:
(define_expand "divsi3"
[(set (match_dup 3) (symbol_ref:SI "__sdivsi3"))
(set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
(set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
(parallel [(set (match_operand:SI 0 "register_operand" "")
(div:SI (reg:SI R4_REG)
(reg:SI R5_REG)))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R1_REG))
(clobber (reg:SI R2_REG))
(clobber (reg:SI R3_REG))
(use (match_dup 3))])]
""
"
{
rtx first, last;
operands[3] = gen_reg_rtx (Pmode);
/* Emit the move of the address to a pseudo outside of the libcall. */
if (TARGET_HARD_SH4 && TARGET_SH3E)
{
emit_move_insn (operands[3],
gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3_i4\"));
if (TARGET_FPU_SINGLE)
last = gen_divsi3_i4_single (operands[0], operands[3]);
else
last = gen_divsi3_i4 (operands[0], operands[3]);
}
else if (TARGET_SHMEDIA_FPU && TARGET_DIVIDE_INV)
{
rtx dividend = operands[1];
rtx divisor = operands[2];
rtx flt_exp = gen_reg_rtx (SImode);
rtx scratch0 = gen_reg_rtx (SImode);
rtx scratch1a = gen_reg_rtx (SFmode);
rtx scratch1b = gen_reg_rtx (SImode);
rtx scratch1c = gen_reg_rtx (SFmode);
rtx shift = gen_reg_rtx (DImode);
rtx inv0 = gen_reg_rtx (SImode);
rtx scratch2a = gen_reg_rtx (DImode);
rtx scratch2b = gen_reg_rtx (SImode);
rtx scratch2c = gen_reg_rtx (DImode);
rtx inv1 = gen_reg_rtx (SImode);
rtx scratch3a = gen_reg_rtx (DImode);
rtx scratch3b = gen_reg_rtx (DImode);
rtx scratch3c = gen_reg_rtx (SImode);
rtx scratch3d = gen_reg_rtx (SImode);
rtx scratch3e = gen_reg_rtx (DFmode);
rtx scratch3f = gen_reg_rtx (DFmode);
rtx scratch3g = gen_reg_rtx (DFmode);
rtx result = gen_reg_rtx (SImode);
if (! arith_reg_or_0_operand (dividend, SImode))
dividend = force_reg (SImode, dividend);
if (! arith_reg_operand (divisor, SImode))
divisor = force_reg (SImode, divisor);
emit_insn (gen_divsi_inv_m0 (flt_exp, divisor, scratch0));
emit_insn (gen_divsi_inv_m1 (inv0, divisor, flt_exp,
scratch1a, scratch1b, scratch1c));
emit_insn (gen_addsidi3_media (shift, GEN_INT (-127), flt_exp));
emit_insn (gen_divsi_inv_m2 (inv1, divisor, inv0, shift,
scratch2a, scratch2b, scratch2c));
emit_insn (gen_divsi_inv_m3 (result, dividend, inv0, inv1, shift,
scratch3a, scratch3b, scratch3c, scratch3d,
scratch3e, scratch3f, scratch3g));
emit_insn (gen_divsi_inv_m4 (operands[0], result, divisor,
scratch3f, scratch3g, scratch3d));
DONE;
}
else if (TARGET_SHMEDIA_FPU)
{
operands[1] = force_reg (SImode, operands[1]);
operands[2] = force_reg (SImode, operands[2]);
emit_insn (gen_divsi3_i4_media (operands[0], operands[1], operands[2]));
DONE;
}
else if (TARGET_SH5)
{
emit_move_insn (operands[3],
gen_rtx_SYMBOL_REF (Pmode,
(TARGET_FPU_ANY
? \"__sdivsi3_i4\"
: \"__sdivsi3\")));
if (TARGET_SHMEDIA)
last = gen_divsi3_i1_media (operands[0], operands[3]);
else if (TARGET_FPU_ANY)
last = gen_divsi3_i4_single (operands[0], operands[3]);
else
last = gen_divsi3_i1 (operands[0], operands[3]);
}
else
{
emit_move_insn (operands[3], gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3\"));
last = gen_divsi3_i1 (operands[0], operands[3]);
}
first = emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
last = emit_insn (last);
/* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
invariant code motion can move it. */
REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
DONE;
}")
(define_insn_and_split "divsi_inv_m1"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "register_operand" "r")]
UNSPEC_DIV_INV_M1))
(clobber (match_operand:SF 3 "register_operand" "=r"))
(clobber (match_operand:SI 4 "register_operand" "=r"))
(clobber (match_operand:SF 5 "register_operand" "=r"))]
"TARGET_SHMEDIA_FPU"
"#"
"&& no_new_pseudos"
[(pc)]
"
{
rtx op4sf = simplify_gen_subreg (SFmode, operands[4], SImode, 0);
rtx divz = gen_label_rtx ();
emit_insn (gen_floatsisf2 (operands[3], operands[1]));
emit_insn (gen_ashlsi3_media (operands[4], operands[2], GEN_INT (23)));
#if 0
emit_jump_insn (gen_beq_media (divz, operands[1], const0_rtx));
emit_insn (gen_divsf3 (operands[5], op4sf, operands[3]));
emit_label (divz);
#else
emit_insn (gen_beq_divsf3_media (operands[5], op4sf, operands[3],
operands[1]));
#endif
emit_insn (gen_fix_truncsfsi2 (operands[0], operands[5]));
DONE;
}")
(define_insn_and_split "divsi_inv_m2"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "register_operand" "r")
(match_operand:DI 3 "register_operand" "r")]
UNSPEC_DIV_INV_M2))
(clobber (match_operand:DI 4 "register_operand" "=r"))
(clobber (match_operand:SI 5 "register_operand" "=r"))
(clobber (match_operand:DI 6 "register_operand" "=r"))]
"TARGET_SHMEDIA_FPU"
"#"
"&& no_new_pseudos"
[(pc)]
"
{
rtx op4_si = simplify_gen_subreg (SImode, operands[4], DImode, 0);
emit_insn (gen_mulsidi3_media (operands[4], operands[1], operands[2]));
emit_insn (gen_movsi (operands[5], GEN_INT (219)));
emit_insn (gen_adddi3 (operands[6], operands[3], GEN_INT (-29)));
emit_insn (gen_ashldi3 (operands[4], operands[4], GEN_INT (20)));
emit_insn (gen_ashrdi3_media (operands[4], operands[4], operands[6]));
emit_insn (gen_mulsidi3_media (operands[4], op4_si, operands[2]));
emit_insn (gen_ashrdi3_media (operands[4], operands[4], GEN_INT (32)));
emit_insn (gen_subsi3 (operands[0], operands[5], op4_si));
DONE;
}")
(define_insn_and_split "divsi_inv_m3"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
(match_operand:SI 2 "register_operand" "r")
(match_operand:SI 3 "register_operand" "r")
(match_operand:DI 4 "register_operand" "r")]
UNSPEC_DIV_INV_M3))
(clobber (match_operand:DI 5 "register_operand" "=r"))
(clobber (match_operand:DI 6 "register_operand" "=r"))
(clobber (match_operand:SI 7 "register_operand" "=r"))
(clobber (match_operand:SI 8 "register_operand" "=r"))
(clobber (match_operand:DF 9 "fp_arith_reg_operand" "=f"))
(clobber (match_operand:DF 10 "fp_arith_reg_operand" "=f"))
(clobber (match_operand:DF 11 "fp_arith_reg_operand" "=f"))]
"TARGET_SHMEDIA_FPU"
"#"
"&& no_new_pseudos"
[(pc)]
"
{
rtx op5_si = simplify_gen_subreg (SImode, operands[5], DImode, 0);
emit_insn (gen_mulsidi3_media (operands[6], operands[1], operands[3]));
emit_insn (gen_mulsidi3_media (operands[5], operands[1], operands[2]));
emit_insn (gen_ashrsi3 (operands[7], operands[1], GEN_INT (31)));
emit_insn (gen_ashrdi3 (operands[6], operands[6], GEN_INT (17)));
emit_insn (gen_adddi3 (operands[5], operands[6], operands[5]));
emit_insn (gen_ashrdi3 (operands[5], operands[5], operands[4]));
emit_insn (gen_subsi3 (operands[0], op5_si, operands[7]));
DONE;
}")
;; If a matching group of divide-by-inverse instructions is in the same
;; basic block after gcse & loop optimizations, we want to transform them
;; to a straight division using floating point. We use combiner splitters
;; with one bridge pattern for this purpose.
(define_split
[(set (match_operand:SI 0 "register_operand" "")
(unspec:SI [(match_operand:SI 1 "register_operand" "")
(unspec:SI [(match_operand:SI 2 "register_operand" "")
(match_operand:SI 3 "register_operand" "")]
UNSPEC_DIV_INV_M1)
(unspec:SI [(match_dup 2)
(unspec:SI [(match_dup 2) (match_dup 3)]
UNSPEC_DIV_INV_M1)
(match_operand:DI 4 "register_operand" "")]
UNSPEC_DIV_INV_M2)
(match_dup 4)]
UNSPEC_DIV_INV_M3))
(clobber (match_operand:DI 5 "" ""))
(clobber (match_operand:DI 6 "" ""))
(clobber (match_operand:SI 7 "fp_arith_reg_operand" ""))
(clobber (match_operand:SI 8 "fp_arith_reg_operand" ""))
(clobber (match_operand:DF 9 "fp_arith_reg_operand" ""))
(clobber (match_operand:DF 10 "fp_arith_reg_operand" ""))
(clobber (match_operand:DF 11 "fp_arith_reg_operand" ""))]
"TARGET_SHMEDIA_FPU && TARGET_DIVIDE_FP && no_new_pseudos"
[(pc)]
"
{
emit_insn (gen_divsi_fp_m0 (operands[9], operands[1],
operands[3], operands[4],
operands[7]));
emit_insn (gen_divsi_fp_m1 (operands[0], operands[9], operands[2],
operands[10], operands[11], operands[8]));
DONE;
}")
Now we got a divsi_inv_m1, a divsi_inv_m2 and a divsi_inv_m3 pattern.
The divsi_inv_m1 pattern feeds the divsi_inv_m2 pattern, and
both feed the divsi_inv_m3 pattern. The combined pattern has lots of
clobbers, a set that exactly matches the ones required for the
define_split. However, try_combine first tries to recognize the
combined pattern as an entire instruction, and as the direct
recognization fails in recog_for_combine, it strips away all the clobbers
and re-tries without the clobbers. This doesn't work either, so
recog_for_combine indicates failure. Now try_combine tries to
split the combined pattern. However, the clobbers are still gone,
so this fails too. The attached patch saves clobbers before calling
recog_for_combine, so that we can try to split that.
2003-08-04 J"orn Rennecke <joern.rennecke@superh.com>
* combine.c (try_combine): If splitting fails, re-try with
original combined pattern, i.e. before clobber stripping.
Index: combine.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/combine.c,v
retrieving revision 1.400.4.11
diff -d -u -p -r1.400.4.11 combine.c
--- combine.c 12 Oct 2004 23:35:29 -0000 1.400.4.11
+++ combine.c 2 Mar 2005 20:46:33 -0000
@@ -1453,6 +1453,7 @@ try_combine (rtx i3, rtx i2, rtx i1, int
{
/* New patterns for I3 and I2, respectively. */
rtx newpat, newi2pat = 0;
+ rtvec newpat_vec_with_clobbers = 0;
int substed_i2 = 0, substed_i1 = 0;
/* Indicates need to preserve SET in I1 or I2 in I3 if it is not dead. */
int added_sets_1, added_sets_2;
@@ -2015,6 +2016,18 @@ try_combine (rtx i3, rtx i2, rtx i1, int
/* Note which hard regs this insn has as inputs. */
mark_used_regs_combine (newpat);
+ /* If recog_for_combine fails, it strips existing clobbers. If we'll
+ consider splitting this pattern, we might need these clobbers. */
+ if (i1 && GET_CODE (newpat) == PARALLEL
+ && GET_CODE (XVECEXP (newpat, 0, XVECLEN (newpat, 0) - 1)) == CLOBBER)
+ {
+ int len = XVECLEN (newpat, 0);
+
+ newpat_vec_with_clobbers = rtvec_alloc (len);
+ for (i = 0; i < len; i++)
+ RTVEC_ELT (newpat_vec_with_clobbers, i) = XVECEXP (newpat, 0, i);
+ }
+
/* Is the result of combination a valid instruction? */
insn_code_number = recog_for_combine (&newpat, i3, &new_i3_notes);
@@ -2132,6 +2145,13 @@ try_combine (rtx i3, rtx i2, rtx i1, int
}
}
+ /* If recog_for_combine has discarded clobbers, try to use them
+ again for the split. */
+ if (m_split == 0 && newpat_vec_with_clobbers)
+ m_split
+ = split_insns (gen_rtx_PARALLEL (VOIDmode,
+ newpat_vec_with_clobbers), i3);
+
if (m_split && NEXT_INSN (m_split) == NULL_RTX)
{
m_split = PATTERN (m_split);