This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC PATCH, i386] Unify TARGET_SSE_MATH for trunc* patterns


Richard Henderson wrote:

Missed truncdfsf2_1? You're not ordering these as we discussed,
and you had been doing for the other patches. Which makes me doubt we'll select the proper pattern.


I'm a bit confused about the choices here in the trunc patterns
in general.  We have choices:

(1) -ffast-math.
(2) allow 80387 reg-reg, but require a scratch memory,
(3) disallow 80387 reg-reg, no scratch memory.

However, the patterns are not consistent in their selection of
2 or 3, and don't seem to allow for all alternatives allowed by
-mfpmath.


Perhaps merging TARGET_80387 and TARGET_MIX_SSE_I387 was not a good idea... I have splitted them to separate patterns and ordered them as they were ordered in previous cleanups. Also, names are a lot more descriptive and splitted/unsplited patterns are grouped together in some logical sense. And a couple of comments were added.

Unfortunatelly, with current state of affairs, we have to provide both unsplitted (with memory clobber) and splitted patterns (without memory clobber) for *_387 and *_mixed patterns. The splitted patterns without mem clobber have their names suffixed with _1.

The patterns now shadow next patterns with respect to TARGET_SSE2, TARGET_MIX_SSE_I387 and additionally TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS.

BTW: I have changed "truncdfsf2" expander to:

(define_expand "truncdfsf2"
 [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
          (float_truncate:SF
           (match_operand:DF 1 "nonimmediate_operand" "")))
         (clobber (match_dup 2))])]
 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
 "
  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
    operands[1] = force_reg (DFmode, operands[1]);
...

As an aside, I would support removing TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS,
since it's currently always zero.  Honza, is this the remnants of
of an experiment to see what's faster, or is this supposed to be
set for Athlon, or what?

I suggest to leave TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS as it is at the moment. Your suggested patch is IMHO a lot more intrusive than my rename/move/enable patch.

I have attached relevant part of i386.md to this message. The change was bootstrapped on pentium4-pc-linux-gnu, regtest is in progress.

Uros.
;; %%% This seems bad bad news.
;; This cannot output into an f-reg because there is no way to be sure
;; of truncating in that case.  Otherwise this is just like a simple move
;; insn.  So we pretend we can output to a reg in order to get better
;; register preferencing, but we really use a stack slot.

;; Conversion from DFmode to SFmode.

(define_expand "truncdfsf2"
  [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
		   (float_truncate:SF
		    (match_operand:DF 1 "nonimmediate_operand" "")))
	      (clobber (match_dup 2))])]
  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
  "
   if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
     operands[1] = force_reg (DFmode, operands[1]);

   if (TARGET_SSE2 && TARGET_SSE_MATH)
     {
	emit_insn (gen_truncdfsf2_sse (operands[0], operands[1]));
	DONE;
     }
   else if (flag_unsafe_math_optimizations)
     {
	rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SFmode);
	emit_insn (gen_truncdfsf2_i387_noop (reg, operands[1]));
	if (reg != operands[0])
	  emit_move_insn (operands[0], reg);
	DONE;
     }
   else
     operands[2] = assign_386_stack_local (SFmode, 0);
")

(define_insn "*truncdfsf2_mixed_sse2_nooverlap"
  [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y")
	(float_truncate:SF
	 (match_operand:DF 1 "nonimmediate_operand" "f#Y,f#Y,f#Y,f#Y,mY#f")))
   (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
  "TARGET_SSE2 && TARGET_MIX_SSE_I387
   && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS"
{
  switch (which_alternative)
    {
    case 0:
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
	return "fstp%z0\t%y0";
      else
	return "fst%z0\t%y0";
    case 4:
      return "#";
    default:
      abort ();
    }
}
  [(set_attr "type" "fmov,multi,multi,multi,ssecvt")
   (set_attr "mode" "SF,SF,SF,SF,DF")])

(define_insn "*truncdfsf2_mixed_sse2_nooverlap_1"
  [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
	(float_truncate:SF
	 (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
  "TARGET_SSE2 && TARGET_MIX_SSE_I387
   && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS 
   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
{
  switch (which_alternative)
    {
    case 0:
      return "#";
    case 1:
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
	return "fstp%z0\t%y0";
      else
	return "fst%z0\t%y0";
    default:
      abort ();
    }
}
  [(set_attr "type" "ssecvt,fmov")
   (set_attr "mode" "DF,SF")])

(define_insn "*truncdfsf2_mixed_sse2"
  [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m#fxr,?f#xr,?r#fx,?x#fr,Y#fr")
	(float_truncate:SF
	 (match_operand:DF 1 "nonimmediate_operand" "f#Y,f#Y,f#Y,f#Y,mY#f")))
   (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
  "TARGET_SSE2 && TARGET_MIX_SSE_I387"
{
  switch (which_alternative)
    {
    case 0:
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
	return "fstp%z0\t%y0";
      else
	return "fst%z0\t%y0";
    case 4:
      return "#";
    default:
      abort ();
    }
}
  [(set_attr "type" "fmov,multi,multi,multi,ssecvt")
   (set_attr "mode" "SF,SF,SF,SF,DF")])

(define_insn "*truncdfsf2_mixed_sse2_1"
  [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,Y,!m")
	(float_truncate:SF
	 (match_operand:DF 1 "nonimmediate_operand" "Y,mY,f#Y")))]
  "TARGET_SSE2 && TARGET_MIX_SSE_I387
   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
{
  switch (which_alternative)
    {
    case 0:
    case 1:
      return "cvtsd2ss\t{%1, %0|%0, %1}";
    case 2:
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
	return "fstp%z0\t%y0";
      else
	return "fst%z0\t%y0";
    default:
      abort ();
    }
}
  [(set_attr "type" "ssecvt,ssecvt,fmov")
   (set_attr "athlon_decode" "vector,double,*")
   (set_attr "mode" "SF,SF,SF")])

(define_insn "*truncdfsf2_mixed_sse1"
  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf")
	(float_truncate:SF
	 (match_operand:DF 1 "register_operand" "f,f,f,f")))
   (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))]
  "TARGET_MIX_SSE_I387"
{
  switch (which_alternative)
    {
    case 0:
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
	return "fstp%z0\t%y0";
      else
	return "fst%z0\t%y0";
    default:
      abort ();
    }
}
  [(set_attr "type" "fmov,multi,multi,multi")
   (set_attr "mode" "SF")])

(define_insn "*truncdfsf2_sse_nooverlap"
  [(set (match_operand:SF 0 "register_operand" "=&Y")
	(float_truncate:SF
	 (match_operand:DF 1 "nonimmediate_operand" "mY")))]
  "TARGET_SSE2 && TARGET_SSE_MATH
   && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS"
  "#"
  [(set_attr "type" "ssecvt")
   (set_attr "mode" "DF")])

(define_insn "truncdfsf2_sse"
  [(set (match_operand:SF 0 "register_operand" "=Y,Y")
	(float_truncate:SF
	 (match_operand:DF 1 "nonimmediate_operand" "Y,mY")))]
  "TARGET_SSE2 && TARGET_SSE_MATH"
  "cvtsd2ss\t{%1, %0|%0, %1}"
  [(set_attr "type" "ssecvt")
   (set_attr "athlon_decode" "vector,double")
   (set_attr "mode" "SF")])

(define_insn "truncdfsf2_i387_noop"
  [(set (match_operand:SF 0 "register_operand" "=f")
	(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
  "TARGET_80387 && flag_unsafe_math_optimizations"
{
  return output_387_reg_move (insn, operands);
}
  [(set_attr "type" "fmov")
   (set_attr "mode" "SF")])

(define_insn "*truncdfsf2_i387"
  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#r,?r#f")
	(float_truncate:SF
	 (match_operand:DF 1 "register_operand" "f,f,f")))
   (clobber (match_operand:SF 2 "memory_operand" "=X,m,m"))]
  "TARGET_80387"
{
  switch (which_alternative)
    {
    case 0:
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
	return "fstp%z0\t%y0";
      else
	return "fst%z0\t%y0";
    default:
      abort ();
    }
}
  [(set_attr "type" "fmov,multi,multi")
   (set_attr "mode" "SF")])

(define_insn "*truncdfsf2_i387_1"
  [(set (match_operand:SF 0 "memory_operand" "=m")
	(float_truncate:SF
	 (match_operand:DF 1 "register_operand" "f")))]
  "TARGET_80387"
{
  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
    return "fstp%z0\t%y0";
  else
    return "fst%z0\t%y0";
}
  [(set_attr "type" "fmov")
   (set_attr "mode" "SF")])

; Avoid possible reformatting penalty on the destination by first
; zeroing it out
(define_split
  [(set (match_operand:SF 0 "register_operand" "")
	(float_truncate:SF
	 (match_operand:DF 1 "nonimmediate_operand" "")))
   (clobber (match_operand 2 "" ""))]
  "TARGET_SSE2 && TARGET_SSE_MATH
   && reload_completed
   && SSE_REG_P (operands[0]) && !STACK_REG_P (operands[1])"
  [(const_int 0)]
{
  rtx src, dest;
  if (!TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS)
    emit_insn (gen_truncdfsf2_sse (operands[0], operands[1]));
  else
    {
      dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
      src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
      /* simplify_gen_subreg refuses to widen memory references.  */
      if (GET_CODE (src) == SUBREG)
	alter_subreg (&src);
      if (reg_overlap_mentioned_p (operands[0], operands[1]))
	abort ();
      emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
      emit_insn (gen_cvtsd2ss (dest, dest, src));
    }
  DONE;
})

(define_split
  [(set (match_operand:SF 0 "register_operand" "")
	(float_truncate:SF
	 (match_operand:DF 1 "nonimmediate_operand" "")))]
  "TARGET_SSE2 && TARGET_SSE_MATH
   && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS
   && reload_completed
   && SSE_REG_P (operands[0])"
  [(const_int 0)]
{
  rtx src, dest;
  dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
  src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
  /* simplify_gen_subreg refuses to widen memory references.  */
  if (GET_CODE (src) == SUBREG)
    alter_subreg (&src);
  if (reg_overlap_mentioned_p (operands[0], operands[1]))
    abort ();
  emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
  emit_insn (gen_cvtsd2ss (dest, dest, src));
  DONE;
})

(define_split
  [(set (match_operand:SF 0 "register_operand" "")
	(float_truncate:SF
	 (match_operand:DF 1 "fp_register_operand" "")))
   (clobber (match_operand:SF 2 "memory_operand" ""))]
  "TARGET_80387
   && reload_completed"
  [(set (match_dup 2) (float_truncate:SF (match_dup 1)))
   (set (match_dup 0) (match_dup 2))]
  "")

(define_split
  [(set (match_operand:SF 0 "memory_operand" "")
	(float_truncate:SF
	 (match_operand:DF 1 "register_operand" "")))
   (clobber (match_operand:SF 2 "memory_operand" ""))]
  "TARGET_80387"
  [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
  "")

;; Conversion from XFmode to SFmode.

(define_expand "truncxfsf2"
  [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
		   (float_truncate:SF
		    (match_operand:XF 1 "register_operand" "")))
	      (clobber (match_dup 2))])]
  "TARGET_80387"
  "
  if (flag_unsafe_math_optimizations)
    {
      rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SFmode);
      emit_insn (gen_truncxfsf2_i387_noop (reg, operands[1]));
      if (reg != operands[0])
	emit_move_insn (operands[0], reg);
      DONE;
    }
  else
    operands[2] = assign_386_stack_local (SFmode, 0);
  ")

(define_insn "*truncxfsf2_mixed"
  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf")
	(float_truncate:SF
	 (match_operand:XF 1 "register_operand" "f,f,f,f")))
   (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))]
  "TARGET_MIX_SSE_I387"
{
  switch (which_alternative)
    {
    case 0:
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
	return "fstp%z0\t%y0";
      else
	return "fst%z0\t%y0";
    default:
      abort();
    }
}
  [(set_attr "type" "fmov,multi,multi,multi")
   (set_attr "mode" "SF")])

(define_insn "truncxfsf2_i387_noop"
  [(set (match_operand:SF 0 "register_operand" "=f")
	(float_truncate:SF (match_operand:XF 1 "register_operand" "f")))]
  "TARGET_80387 && flag_unsafe_math_optimizations"
{
  return output_387_reg_move (insn, operands);
}
  [(set_attr "type" "fmov")
   (set_attr "mode" "SF")])

(define_insn "*truncxfsf2_i387"
  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#r,?r#f")
	(float_truncate:SF
	 (match_operand:XF 1 "register_operand" "f,f,f")))
   (clobber (match_operand:SF 2 "memory_operand" "=X,m,m"))]
  "TARGET_80387"
{
  switch (which_alternative)
    {
    case 0:
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
	return "fstp%z0\t%y0";
      else
	return "fst%z0\t%y0";
    default:
      abort ();
    }
}
  [(set_attr "type" "fmov,multi,multi")
   (set_attr "mode" "SF")])

(define_insn "*truncxfsf2_i387_1"
  [(set (match_operand:SF 0 "memory_operand" "=m")
	(float_truncate:SF
	 (match_operand:XF 1 "register_operand" "f")))]
  "TARGET_80387"
{
  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
    return "fstp%z0\t%y0";
  else
    return "fst%z0\t%y0";
}
  [(set_attr "type" "fmov")
   (set_attr "mode" "SF")])

(define_split
  [(set (match_operand:SF 0 "register_operand" "")
	(float_truncate:SF
	 (match_operand:XF 1 "register_operand" "")))
   (clobber (match_operand:SF 2 "memory_operand" ""))]
  "TARGET_80387 && reload_completed"
  [(set (match_dup 2) (float_truncate:SF (match_dup 1)))
   (set (match_dup 0) (match_dup 2))]
  "")

(define_split
  [(set (match_operand:SF 0 "memory_operand" "")
	(float_truncate:SF
	 (match_operand:XF 1 "register_operand" "")))
   (clobber (match_operand:SF 2 "memory_operand" ""))]
  "TARGET_80387"
  [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
  "")

;; Conversion from XFmode to DFmode.

(define_expand "truncxfdf2"
  [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
		   (float_truncate:DF
		    (match_operand:XF 1 "register_operand" "")))
	      (clobber (match_dup 2))])]
  "TARGET_80387"
  "
  if (flag_unsafe_math_optimizations)
    {
      rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DFmode);
      emit_insn (gen_truncxfdf2_i387_noop (reg, operands[1]));
      if (reg != operands[0])
	emit_move_insn (operands[0], reg);
      DONE;
    }
  else
    operands[2] = assign_386_stack_local (DFmode, 0);
  ")

(define_insn "*truncxfdf2_mixed"
  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#rY,?r#fY,?Y#rf")
	(float_truncate:DF
	 (match_operand:XF 1 "register_operand" "f,f,f,f")))
   (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))]
  "TARGET_SSE2 && TARGET_MIX_SSE_I387"
{
  switch (which_alternative)
    {
    case 0:
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
	return "fstp%z0\t%y0";
      else
	return "fst%z0\t%y0";
    default:
      abort();
    }
  abort ();
}
  [(set_attr "type" "fmov,multi,multi,multi")
   (set_attr "mode" "DF")])

(define_insn "truncxfdf2_i387_noop"
  [(set (match_operand:DF 0 "register_operand" "=f")
	(float_truncate:DF (match_operand:XF 1 "register_operand" "f")))]
  "TARGET_80387 && flag_unsafe_math_optimizations"
{
  return output_387_reg_move (insn, operands);
}
  [(set_attr "type" "fmov")
   (set_attr "mode" "DF")])

(define_insn "*truncxfdf2_i387"
  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#r,?r#f")
	(float_truncate:DF
	 (match_operand:XF 1 "register_operand" "f,f,f")))
   (clobber (match_operand:DF 2 "memory_operand" "=X,m,m"))]
  "TARGET_80387"
{
  switch (which_alternative)
    {
    case 0:
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
	return "fstp%z0\t%y0";
      else
	return "fst%z0\t%y0";
    default:
      abort ();
    }
}
  [(set_attr "type" "fmov,multi,multi")
   (set_attr "mode" "DF")])

(define_insn "*truncxfdf2_i387_1"
  [(set (match_operand:DF 0 "memory_operand" "=m")
	(float_truncate:DF
	  (match_operand:XF 1 "register_operand" "f")))]
  "TARGET_80387"
{
  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
    return "fstp%z0\t%y0";
  else
    return "fst%z0\t%y0";
}
  [(set_attr "type" "fmov")
   (set_attr "mode" "DF")])

(define_split
  [(set (match_operand:DF 0 "register_operand" "")
	(float_truncate:DF
	 (match_operand:XF 1 "register_operand" "")))
   (clobber (match_operand:DF 2 "memory_operand" ""))]
  "TARGET_80387 && reload_completed"
  [(set (match_dup 2) (float_truncate:DF (match_dup 1)))
   (set (match_dup 0) (match_dup 2))]
  "")

(define_split
  [(set (match_operand:DF 0 "memory_operand" "")
	(float_truncate:DF
	 (match_operand:XF 1 "register_operand" "")))
   (clobber (match_operand:DF 2 "memory_operand" ""))]
  "TARGET_80387"
  [(set (match_dup 0) (float_truncate:DF (match_dup 1)))]
  "")


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]