This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RFA: try original clobbers for combiner splitters



This patch was first posted here together with a number of other patches:
http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01937.html
http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01938.html

However, I didn't have time to separate and explain the patches then,
so let's start with this combine.c one.
Consider these patterns:

(define_expand "divsi3"
  [(set (match_dup 3) (symbol_ref:SI "__sdivsi3"))
   (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
   (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
   (parallel [(set (match_operand:SI 0 "register_operand" "")
                   (div:SI (reg:SI R4_REG)
                           (reg:SI R5_REG)))
              (clobber (reg:SI T_REG))
              (clobber (reg:SI PR_REG))
              (clobber (reg:SI R1_REG))
              (clobber (reg:SI R2_REG))
              (clobber (reg:SI R3_REG))
              (use (match_dup 3))])]
  ""
  "
{
  rtx first, last;

  operands[3] = gen_reg_rtx (Pmode);
  /* Emit the move of the address to a pseudo outside of the libcall.  */
  if (TARGET_HARD_SH4 && TARGET_SH3E)
    {
      emit_move_insn (operands[3],
                      gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3_i4\"));
      if (TARGET_FPU_SINGLE)
        last = gen_divsi3_i4_single (operands[0], operands[3]);
      else
        last = gen_divsi3_i4 (operands[0], operands[3]);
    }
  else if (TARGET_SHMEDIA_FPU && TARGET_DIVIDE_INV)
    {
      rtx dividend = operands[1];
      rtx divisor = operands[2];
      rtx flt_exp = gen_reg_rtx (SImode);
      rtx scratch0 = gen_reg_rtx (SImode);
      rtx scratch1a = gen_reg_rtx (SFmode);
      rtx scratch1b = gen_reg_rtx (SImode);
      rtx scratch1c = gen_reg_rtx (SFmode);
      rtx shift = gen_reg_rtx (DImode);
      rtx inv0 = gen_reg_rtx (SImode);
      rtx scratch2a = gen_reg_rtx (DImode);
      rtx scratch2b = gen_reg_rtx (SImode);
      rtx scratch2c = gen_reg_rtx (DImode);
      rtx inv1 = gen_reg_rtx (SImode);
      rtx scratch3a = gen_reg_rtx (DImode);
      rtx scratch3b = gen_reg_rtx (DImode);
      rtx scratch3c = gen_reg_rtx (SImode);
      rtx scratch3d = gen_reg_rtx (SImode);
      rtx scratch3e = gen_reg_rtx (DFmode);
      rtx scratch3f = gen_reg_rtx (DFmode);
      rtx scratch3g = gen_reg_rtx (DFmode);
      rtx result = gen_reg_rtx (SImode);

      if (! arith_reg_or_0_operand (dividend, SImode))
        dividend = force_reg (SImode, dividend);
      if (! arith_reg_operand (divisor, SImode))
        divisor = force_reg (SImode, divisor);
      emit_insn (gen_divsi_inv_m0 (flt_exp, divisor, scratch0));
      emit_insn (gen_divsi_inv_m1 (inv0, divisor, flt_exp,
                                   scratch1a, scratch1b, scratch1c));
      emit_insn (gen_addsidi3_media (shift, GEN_INT (-127), flt_exp));
      emit_insn (gen_divsi_inv_m2 (inv1, divisor, inv0, shift,
                                   scratch2a, scratch2b, scratch2c));
      emit_insn (gen_divsi_inv_m3 (result, dividend, inv0, inv1, shift,
                                   scratch3a, scratch3b, scratch3c, scratch3d,
                                   scratch3e, scratch3f, scratch3g));
      emit_insn (gen_divsi_inv_m4 (operands[0], result, divisor,
                                   scratch3f, scratch3g, scratch3d));
      DONE;
    }
  else if (TARGET_SHMEDIA_FPU)
    {
      operands[1] = force_reg (SImode, operands[1]);
      operands[2] = force_reg (SImode, operands[2]);
      emit_insn (gen_divsi3_i4_media (operands[0], operands[1], operands[2]));
      DONE;
    }
  else if (TARGET_SH5)
    {
      emit_move_insn (operands[3],
                      gen_rtx_SYMBOL_REF (Pmode,
                                          (TARGET_FPU_ANY
                                           ? \"__sdivsi3_i4\"
                                           : \"__sdivsi3\")));

      if (TARGET_SHMEDIA)
        last = gen_divsi3_i1_media (operands[0], operands[3]);
      else if (TARGET_FPU_ANY)
        last = gen_divsi3_i4_single (operands[0], operands[3]);
      else
        last = gen_divsi3_i1 (operands[0], operands[3]);
    }
  else
    {
      emit_move_insn (operands[3], gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3\"));
      last = gen_divsi3_i1 (operands[0], operands[3]);
    }
  first = emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
  emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
  last = emit_insn (last);
  /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
     invariant code motion can move it.  */
  REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
  REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
  DONE;
}")

(define_insn_and_split "divsi_inv_m1"
  [(set (match_operand:SI 0 "register_operand" "=r")
        (unspec:SI [(match_operand:SI 1 "register_operand" "r")
                    (match_operand:SI 2 "register_operand" "r")]
         UNSPEC_DIV_INV_M1))
   (clobber (match_operand:SF 3 "register_operand" "=r"))
   (clobber (match_operand:SI 4 "register_operand" "=r"))
   (clobber (match_operand:SF 5 "register_operand" "=r"))]
  "TARGET_SHMEDIA_FPU"
  "#"
  "&& no_new_pseudos"
  [(pc)]
  "
{
  rtx op4sf = simplify_gen_subreg (SFmode, operands[4], SImode, 0);
  rtx divz = gen_label_rtx ();

  emit_insn (gen_floatsisf2 (operands[3], operands[1]));
  emit_insn (gen_ashlsi3_media (operands[4], operands[2], GEN_INT (23)));
#if 0
  emit_jump_insn (gen_beq_media (divz, operands[1], const0_rtx));
  emit_insn (gen_divsf3 (operands[5], op4sf, operands[3]));
  emit_label (divz);
#else
  emit_insn (gen_beq_divsf3_media (operands[5], op4sf, operands[3],
                                   operands[1]));
#endif
  emit_insn (gen_fix_truncsfsi2 (operands[0], operands[5]));
  DONE;
}")

(define_insn_and_split "divsi_inv_m2"
  [(set (match_operand:SI 0 "register_operand" "=r")
        (unspec:SI [(match_operand:SI 1 "register_operand" "r")
                    (match_operand:SI 2 "register_operand" "r")
                    (match_operand:DI 3 "register_operand" "r")]
         UNSPEC_DIV_INV_M2))
   (clobber (match_operand:DI 4 "register_operand" "=r"))
   (clobber (match_operand:SI 5 "register_operand" "=r"))
   (clobber (match_operand:DI 6 "register_operand" "=r"))]
  "TARGET_SHMEDIA_FPU"
  "#"
  "&& no_new_pseudos"
  [(pc)]
  "
{
  rtx op4_si = simplify_gen_subreg (SImode, operands[4], DImode, 0);

  emit_insn (gen_mulsidi3_media (operands[4], operands[1], operands[2]));
  emit_insn (gen_movsi (operands[5], GEN_INT (219)));
  emit_insn (gen_adddi3 (operands[6], operands[3], GEN_INT (-29)));
  emit_insn (gen_ashldi3 (operands[4], operands[4], GEN_INT (20)));
  emit_insn (gen_ashrdi3_media (operands[4], operands[4], operands[6]));
  emit_insn (gen_mulsidi3_media (operands[4], op4_si, operands[2]));
  emit_insn (gen_ashrdi3_media (operands[4], operands[4], GEN_INT (32)));
  emit_insn (gen_subsi3 (operands[0], operands[5], op4_si));
  DONE;
}")

(define_insn_and_split "divsi_inv_m3"
  [(set (match_operand:SI 0 "register_operand" "=r")
        (unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
                    (match_operand:SI 2 "register_operand" "r")
                    (match_operand:SI 3 "register_operand" "r")
                    (match_operand:DI 4 "register_operand" "r")]
         UNSPEC_DIV_INV_M3))
   (clobber (match_operand:DI 5 "register_operand" "=r"))
   (clobber (match_operand:DI 6 "register_operand" "=r"))
   (clobber (match_operand:SI 7 "register_operand" "=r"))
   (clobber (match_operand:SI 8 "register_operand" "=r"))
   (clobber (match_operand:DF 9 "fp_arith_reg_operand" "=f"))
   (clobber (match_operand:DF 10 "fp_arith_reg_operand" "=f"))
   (clobber (match_operand:DF 11 "fp_arith_reg_operand" "=f"))]
  "TARGET_SHMEDIA_FPU"
  "#"
  "&& no_new_pseudos"
  [(pc)]
  "
{
  rtx op5_si = simplify_gen_subreg (SImode, operands[5], DImode, 0);

  emit_insn (gen_mulsidi3_media (operands[6], operands[1], operands[3]));
  emit_insn (gen_mulsidi3_media (operands[5], operands[1], operands[2]));
  emit_insn (gen_ashrsi3 (operands[7], operands[1], GEN_INT (31)));
  emit_insn (gen_ashrdi3 (operands[6], operands[6], GEN_INT (17)));
  emit_insn (gen_adddi3 (operands[5], operands[6], operands[5]));
  emit_insn (gen_ashrdi3 (operands[5], operands[5], operands[4]));
  emit_insn (gen_subsi3 (operands[0], op5_si, operands[7]));
  DONE;
}")

;; If a matching group of divide-by-inverse instructions is in the same
;; basic block after gcse & loop optimizations, we want to transform them
;; to a straight division using floating point.  We use combiner splitters
;; with one bridge pattern for this purpose.
(define_split
  [(set (match_operand:SI 0 "register_operand" "")
        (unspec:SI [(match_operand:SI 1 "register_operand" "")
                    (unspec:SI [(match_operand:SI 2 "register_operand" "")
                                (match_operand:SI 3 "register_operand" "")]
                     UNSPEC_DIV_INV_M1)
                    (unspec:SI [(match_dup 2)
                                (unspec:SI [(match_dup 2) (match_dup 3)]
                                 UNSPEC_DIV_INV_M1)
                                (match_operand:DI 4 "register_operand" "")]
                     UNSPEC_DIV_INV_M2)
                    (match_dup 4)]
         UNSPEC_DIV_INV_M3))
   (clobber (match_operand:DI 5 "" ""))
   (clobber (match_operand:DI 6 "" ""))
   (clobber (match_operand:SI 7 "fp_arith_reg_operand" ""))
   (clobber (match_operand:SI 8 "fp_arith_reg_operand" ""))
   (clobber (match_operand:DF 9 "fp_arith_reg_operand" ""))
   (clobber (match_operand:DF 10 "fp_arith_reg_operand" ""))
   (clobber (match_operand:DF 11 "fp_arith_reg_operand" ""))]
  "TARGET_SHMEDIA_FPU && TARGET_DIVIDE_FP && no_new_pseudos"
  [(pc)]
  "
{
  emit_insn (gen_divsi_fp_m0 (operands[9], operands[1],
                              operands[3], operands[4],
                              operands[7]));
  emit_insn (gen_divsi_fp_m1 (operands[0], operands[9], operands[2],
                              operands[10], operands[11], operands[8]));
  DONE;
}")

Now we got a divsi_inv_m1, a divsi_inv_m2 and a divsi_inv_m3 pattern.
The divsi_inv_m1 pattern feeds the divsi_inv_m2 pattern, and
both feed the divsi_inv_m3 pattern.  The combined pattern has lots of
clobbers, a set that exactly matches the ones required for the
define_split.  However, try_combine first tries to recognize the
combined pattern as an entire instruction, and as the direct
recognization fails in recog_for_combine, it strips away all the clobbers
and re-tries without the clobbers.  This doesn't work either, so
recog_for_combine indicates failure.  Now try_combine tries to
split the combined pattern.  However, the clobbers are still gone,
so this fails too.  The attached patch saves clobbers before calling
recog_for_combine, so that we can try to split that.

2003-08-04  J"orn Rennecke <joern.rennecke@superh.com>

	* combine.c (try_combine): If splitting fails, re-try with
	original combined pattern, i.e. before clobber stripping.

Index: combine.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/combine.c,v
retrieving revision 1.400.4.11
diff -d -u -p -r1.400.4.11 combine.c
--- combine.c	12 Oct 2004 23:35:29 -0000	1.400.4.11
+++ combine.c	2 Mar 2005 20:46:33 -0000
@@ -1453,6 +1453,7 @@ try_combine (rtx i3, rtx i2, rtx i1, int
 {
   /* New patterns for I3 and I2, respectively.  */
   rtx newpat, newi2pat = 0;
+  rtvec newpat_vec_with_clobbers = 0;
   int substed_i2 = 0, substed_i1 = 0;
   /* Indicates need to preserve SET in I1 or I2 in I3 if it is not dead.  */
   int added_sets_1, added_sets_2;
@@ -2015,6 +2016,18 @@ try_combine (rtx i3, rtx i2, rtx i1, int
   /* Note which hard regs this insn has as inputs.  */
   mark_used_regs_combine (newpat);
 
+  /* If recog_for_combine fails, it strips existing clobbers.  If we'll
+     consider splitting this pattern, we might need these clobbers.  */
+  if (i1 && GET_CODE (newpat) == PARALLEL
+      && GET_CODE (XVECEXP (newpat, 0, XVECLEN (newpat, 0) - 1)) == CLOBBER)
+    {
+      int len = XVECLEN (newpat, 0);
+
+      newpat_vec_with_clobbers = rtvec_alloc (len);
+      for (i = 0; i < len; i++)
+	RTVEC_ELT (newpat_vec_with_clobbers, i) = XVECEXP (newpat, 0, i);
+    }
+
   /* Is the result of combination a valid instruction?  */
   insn_code_number = recog_for_combine (&newpat, i3, &new_i3_notes);
 
@@ -2132,6 +2145,13 @@ try_combine (rtx i3, rtx i2, rtx i1, int
 	    }
 	}
 
+      /* If recog_for_combine has discarded clobbers, try to use them
+	 again for the split.  */
+      if (m_split == 0 && newpat_vec_with_clobbers)
+	m_split
+	  = split_insns (gen_rtx_PARALLEL (VOIDmode,
+					   newpat_vec_with_clobbers), i3);
+
       if (m_split && NEXT_INSN (m_split) == NULL_RTX)
 	{
 	  m_split = PATTERN (m_split);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]