convert sse unsigned fp->int to post-reload expander

Richard Henderson rth@redhat.com
Tue Feb 13 01:34:00 GMT 2007


One piece at a time.

This allows the sse expansion that Apple contributed to be used
anywhere, not just TARGET_KEEPS_VECTOR_ALIGNED_STACK, because we
never expose the vector operations to reload.  So we never have
to spill a vector temporary.

Tested on pentium4-linux.


r~


        * config/i386/i386.md (fixuns_trunc<SSEMODEF>si_1): New insn.
        (fixuns_trunc<SSEMODEF>si2): Use it.
        * config/i386/sse.md (vec_setv4sf_0): Export.
        * config/i386/i386.c (ix86_build_const_vector): Export.
        (ix86_split_convert_uns_si_sse): Rename from
        ix86_expand_convert_uns_si_sse and rewrite as a splitter.
        * config/i386/i386-protos.h: Update.

Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md	(revision 121859)
+++ gcc/config/i386/i386.md	(working copy)
@@ -488,6 +488,9 @@
 ;; SSE asm suffix for floating point modes
 (define_mode_attr ssemodefsuffix [(SF "s") (DF "d")])
 
+;; SSE vector mode corresponding to a scalar mode
+(define_mode_attr ssevecmode
+  [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")])
 
 ;; Scheduling descriptions
 
@@ -4334,12 +4337,39 @@
 ;; Unsigned conversion to SImode.
 
 (define_expand "fixuns_trunc<mode>si2"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-	           (fix:SI (match_operand:SSEMODEF 1 "register_operand" "")))]
-  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
-   && TARGET_KEEPS_VECTOR_ALIGNED_STACK && !optimize_size"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unsigned_fix:SI
+	    (match_operand:SSEMODEF 1 "nonimmediate_operand" "")))
+     (use (match_dup 2))
+     (clobber (match_scratch:<ssevecmode> 3 ""))
+     (clobber (match_scratch:<ssevecmode> 4 ""))])]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH && !optimize_size"
+{
+  enum machine_mode mode = <MODE>mode;
+  enum machine_mode vecmode = <ssevecmode>mode;
+  REAL_VALUE_TYPE TWO31r;
+  rtx two31;
+
+  real_ldexp (&TWO31r, &dconst1, 31);
+  two31 = const_double_from_real_value (TWO31r, mode);
+  two31 = ix86_build_const_vector (mode, true, two31);
+  operands[2] = force_reg (vecmode, two31);
+})
+
+(define_insn_and_split "*fixuns_trunc<mode>_1"
+  [(set (match_operand:SI 0 "register_operand" "=&x,&x")
+	(unsigned_fix:SI
+	  (match_operand:SSEMODEF 3 "nonimmediate_operand" "xm,xm")))
+   (use (match_operand:<ssevecmode> 4  "nonimmediate_operand" "m,x"))
+   (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
+   (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH && !optimize_size"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
 {
-  ix86_expand_convert_uns_si_sse (operands[0], operands[1]);
+  ix86_split_convert_uns_si_sse (operands);
   DONE;
 })
 
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md	(revision 121859)
+++ gcc/config/i386/sse.md	(working copy)
@@ -1367,7 +1367,7 @@
   DONE;
 })
 
-(define_insn "*vec_setv4sf_0"
+(define_insn "vec_setv4sf_0"
   [(set (match_operand:V4SF 0 "nonimmediate_operand"  "=x,x,Y2,m")
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
Index: gcc/config/i386/i386-protos.h
===================================================================
--- gcc/config/i386/i386-protos.h	(revision 121859)
+++ gcc/config/i386/i386-protos.h	(working copy)
@@ -89,7 +89,8 @@ extern void ix86_expand_binary_operator 
 extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
 extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
 					rtx[]);
-extern void ix86_expand_convert_uns_si_sse (rtx, rtx);
+extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
+extern void ix86_split_convert_uns_si_sse (rtx[]);
 extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
 extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
 extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 121859)
+++ gcc/config/i386/i386.c	(working copy)
@@ -1519,7 +1519,6 @@ static const char *ix86_mangle_fundament
 static tree ix86_stack_protect_fail (void);
 static rtx ix86_internal_arg_pointer (void);
 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
-static rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
 						 rtx, rtx, int);
 
@@ -9865,58 +9864,64 @@ ix86_unary_operator_ok (enum rtx_code co
   return TRUE;
 }
 
-/* Convert an SF or DFmode value in an SSE register into an unsigned SImode.
-   When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
-   conversion, and ignoring the upper 32 bits of the result.  On x86_64,
-   there is an equivalent SSE %xmm->signed-int-64 conversion.
-
-   On x86_32, we don't have the instruction, nor the 64-bit destination
-   register it requires.  Do the conversion inline in the SSE registers.
-   Requires SSE2.  For x86_32, -mfpmath=sse, !optimize_size only.  */
+/* Post-reload splitter for converting an SF or DFmode value in an
+   SSE register into an unsigned SImode.  */
 
 void
-ix86_expand_convert_uns_si_sse (rtx target, rtx input)
+ix86_split_convert_uns_si_sse (rtx operands[])
 {
-  REAL_VALUE_TYPE TWO31r;
-  enum machine_mode mode, vecmode;
-  rtx two31, value, large, sign, result_vec, zero_or_two31, x;
+  enum machine_mode vecmode;
+  rtx value, large, zero_or_two31, input, two31, x;
 
-  mode = GET_MODE (input);
-  vecmode = mode == SFmode ? V4SFmode : V2DFmode;
+  large = operands[1];
+  zero_or_two31 = operands[2];
+  input = operands[3];
+  two31 = operands[4];
+  vecmode = GET_MODE (large);
+  value = gen_rtx_REG (vecmode, REGNO (operands[0]));
 
-  real_ldexp (&TWO31r, &dconst1, 31);
-  two31 = const_double_from_real_value (TWO31r, mode);
-  two31 = ix86_build_const_vector (mode, true, two31);
-  two31 = force_reg (vecmode, two31);
+  /* Load up the value into the low element.  We must ensure that the other
+     elements are valid floats -- zero is the easiest such value.  */
+  if (MEM_P (input))
+    {
+      if (vecmode == V4SFmode)
+	emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
+      else
+	emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
+    }
+  else
+    {
+      input = gen_rtx_REG (vecmode, REGNO (input));
+      emit_move_insn (value, CONST0_RTX (vecmode));
+      if (vecmode == V4SFmode)
+	emit_insn (gen_sse_movss (value, value, input));
+      else
+	emit_insn (gen_sse2_movsd (value, value, input));
+    }
 
-  value = gen_reg_rtx (vecmode);
-  ix86_expand_vector_init_one_nonzero (false, vecmode, value, input, 0);
+  emit_move_insn (large, two31);
+  emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
 
-  large = gen_reg_rtx (vecmode);
-  x = gen_rtx_fmt_ee (LE, vecmode, two31, value);
+  x = gen_rtx_fmt_ee (LE, vecmode, large, value);
   emit_insn (gen_rtx_SET (VOIDmode, large, x));
 
-  zero_or_two31 = gen_reg_rtx (vecmode);
-  x = gen_rtx_AND (vecmode, large, two31);
+  x = gen_rtx_AND (vecmode, zero_or_two31, large);
   emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
 
   x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
   emit_insn (gen_rtx_SET (VOIDmode, value, x));
 
-  result_vec = gen_reg_rtx (V4SImode);
-  if (mode == SFmode)
-    x = gen_sse2_cvttps2dq (result_vec, value);
-  else
-    x = gen_sse2_cvttpd2dq (result_vec, value);
-  emit_insn (x);
-
-  sign = gen_reg_rtx (V4SImode);
-  emit_insn (gen_ashlv4si3 (sign, gen_lowpart (V4SImode, large),
-			    GEN_INT (31)));
+  large = gen_rtx_REG (V4SImode, REGNO (large));
+  emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
 
-  emit_insn (gen_xorv4si3 (result_vec, result_vec, sign));
+  x = gen_rtx_REG (V4SImode, REGNO (value));
+  if (vecmode == V4SFmode)
+    emit_insn (gen_sse2_cvttps2dq (x, value));
+  else
+    emit_insn (gen_sse2_cvttpd2dq (x, value));
+  value = x;
 
-  ix86_expand_vector_extract (false, target, result_vec, 0);
+  emit_insn (gen_xorv4si3 (value, value, large));
 }
 
 /* Convert an unsigned DImode value into a DFmode, using only SSE.
@@ -10066,7 +10071,7 @@ ix86_expand_convert_uns_sisf_sse (rtx ta
    then replicate the value for all elements of the vector
    register.  */
 
-static rtx
+rtx
 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
 {
   rtvec v;



More information about the Gcc-patches mailing list