This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PING][PATCH] Expand lround inline for x86_64/i?86 SSE math


On Thu, 19 Oct 2006, Richard Guenther wrote:

> On Wed, 18 Oct 2006, Geert Bosch wrote:
> 
> > 
> > On Oct 18, 2006, at 12:22, Richard Guenther wrote:
> > 
> > >which would be fine (as we can restrict the inline expansion to
> > >-fno-rounding-math then, which is the default).  Now on to a way
> > >to tell real.c to do nextafter (0.5, 0.0) ...
> > 
> > You can lift that code from ada/trans.c:
> > 
> >      /* Compute the exact value calc_type'Pred (0.5) at compile time. */
> >      fmt = REAL_MODE_FORMAT (TYPE_MODE (calc_type));
> >      real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
> >      REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf,
> >                       half_minus_pred_half);
> >      gnu_pred_half = build_real (calc_type, pred_half);
> 
> Thanks, this did it.  Here is an updated patch, bootstrapped and tested on
> x86_64-unknown-linux-gnu.
> 
> Ok for 4.3?

I re-tested the patch below for its own on top of the already applied
patches again.

Ok for mainline?

Thanks,
Richard.


> 2006-08-22  Richard Guenther  <rguenther@suse.de>
> 
> 	* builtins.c (expand_builtin_int_roundingfn_2): Expand
> 	BUILT_IN_LROUND and BUILT_IN_LLROUND from here.
> 	(expand_builtin): Adjust likewise.
> 	* genopinit.c (optabs[]): Add lround optab.
> 	* optabs.c (init_optabs): Initialize lround_optab.
> 	* optabs.h (enum convert_optab_index): Add COI_lround.
> 	(lround_optab): Define.
> 	* config/i386/i386-protos.h (ix86_expand_lround): Declare.
> 	* config/i386/i386.c (ix86_sse_copysign_to_positive): New
> 	static function.
> 	(ix86_expand_lround): New function.
> 	* config/i386/i386.md (lround<mode>di2, lround<mode>si2):
> 	New expanders.
> 	* doc/md.texi (lroundMN2): Document.
> 
> 	* gcc.target/i386/math-torture/lround.c: New testcase.
> 
> 
> Index: gcc/builtins.c
> ===================================================================
> --- gcc.orig/builtins.c
> +++ gcc/builtins.c
> @@ -2340,6 +2340,9 @@ expand_builtin_int_roundingfn_2 (tree ex
>      CASE_FLT_FN (BUILT_IN_LRINT):
>      CASE_FLT_FN (BUILT_IN_LLRINT):
>        builtin_optab = lrint_optab; break;
> +    CASE_FLT_FN (BUILT_IN_LROUND):
> +    CASE_FLT_FN (BUILT_IN_LLROUND):
> +      builtin_optab = lround_optab; break;
>      default:
>        gcc_unreachable ();
>      }
> @@ -5808,6 +5811,8 @@ expand_builtin (tree exp, rtx target, rt
>  
>      CASE_FLT_FN (BUILT_IN_LRINT):
>      CASE_FLT_FN (BUILT_IN_LLRINT):
> +    CASE_FLT_FN (BUILT_IN_LROUND):
> +    CASE_FLT_FN (BUILT_IN_LLROUND):
>        target = expand_builtin_int_roundingfn_2 (exp, target, subtarget);
>        if (target)
>  	return target;
> Index: gcc/genopinit.c
> ===================================================================
> --- gcc.orig/genopinit.c
> +++ gcc/genopinit.c
> @@ -128,6 +128,7 @@ static const char * const optabs[] =
>    "nearbyint_optab->handlers[$A].insn_code = CODE_FOR_$(nearbyint$a2$)",
>    "rint_optab->handlers[$A].insn_code = CODE_FOR_$(rint$a2$)",
>    "lrint_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lrint$F$a$I$b2$)",
> +  "lround_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lround$F$a$I$b2$)",
>    "sincos_optab->handlers[$A].insn_code = CODE_FOR_$(sincos$a3$)",
>    "sin_optab->handlers[$A].insn_code = CODE_FOR_$(sin$a2$)",
>    "asin_optab->handlers[$A].insn_code = CODE_FOR_$(asin$a2$)",
> Index: gcc/optabs.c
> ===================================================================
> --- gcc.orig/optabs.c
> +++ gcc/optabs.c
> @@ -5365,6 +5365,7 @@ init_optabs (void)
>    sfloat_optab = init_convert_optab (FLOAT);
>    ufloat_optab = init_convert_optab (UNSIGNED_FLOAT);
>    lrint_optab = init_convert_optab (UNKNOWN);
> +  lround_optab = init_convert_optab (UNKNOWN);
>  
>    for (i = 0; i < NUM_MACHINE_MODES; i++)
>      {
> @@ -5486,6 +5487,8 @@ init_optabs (void)
>  				 MODE_INT, MODE_DECIMAL_FLOAT);
>    init_interclass_conv_libfuncs (lrint_optab, "lrint",
>  				 MODE_INT, MODE_FLOAT);
> +  init_interclass_conv_libfuncs (lround_optab, "lround",
> +				 MODE_INT, MODE_FLOAT);
>  
>    /* sext_optab is also used for FLOAT_EXTEND.  */
>    init_intraclass_conv_libfuncs (sext_optab, "extend", MODE_FLOAT, true);
> Index: gcc/optabs.h
> ===================================================================
> --- gcc.orig/optabs.h
> +++ gcc/optabs.h
> @@ -406,6 +406,7 @@ enum convert_optab_index
>    COI_ufloat,
>  
>    COI_lrint,
> +  COI_lround,
>  
>    COI_MAX
>  };
> @@ -422,6 +423,7 @@ extern GTY(()) convert_optab convert_opt
>  #define sfloat_optab (convert_optab_table[COI_sfloat])
>  #define ufloat_optab (convert_optab_table[COI_ufloat])
>  #define lrint_optab (convert_optab_table[COI_lrint])
> +#define lround_optab (convert_optab_table[COI_lround])
>  
>  /* These arrays record the insn_code of insns that may be needed to
>     perform input and output reloads of special objects.  They provide a
> Index: gcc/config/i386/i386-protos.h
> ===================================================================
> --- gcc.orig/config/i386/i386-protos.h
> +++ gcc/config/i386/i386-protos.h
> @@ -157,6 +157,8 @@ extern void ix86_emit_i387_log1p (rtx, r
>  
>  extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
>  
> +extern void ix86_expand_lround (rtx, rtx);
> +
>  #ifdef TREE_CODE
>  extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
>  extern rtx function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
> Index: gcc/config/i386/i386.c
> ===================================================================
> --- gcc.orig/config/i386/i386.c
> +++ gcc/config/i386/i386.c
> @@ -19014,5 +19014,57 @@ asm_preferred_eh_data_format (int code, 
>      return DW_EH_PE_udata4;
>    return DW_EH_PE_absptr;
>  }
> +
> +/* Expand copysign from SIGN to the positive value ABS_VALUE
> +   storing in RESULT.  */
> +static void
> +ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign)
> +{
> +  enum machine_mode mode = GET_MODE (sign);
> +  rtx sgn = gen_reg_rtx (mode);
> +  rtx mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
> +  if (!VECTOR_MODE_P (mode))
> +    {
> +      /* We need to generate a scalar mode mask in this case.  */
> +      rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
> +      tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
> +      mask = gen_reg_rtx (mode);
> +      emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
> +    }
> +  emit_insn (gen_rtx_SET (VOIDmode, sgn,
> +			  gen_rtx_AND (mode, mask, sign)));
> +  emit_insn (gen_rtx_SET (VOIDmode, result,
> +			  gen_rtx_IOR (mode, abs_value, sgn)));
> +}
> +
> +/* Expand SSE sequence for computing lround from OP1 storing
> +   into OP0.  */
> +void
> +ix86_expand_lround (rtx op0, rtx op1)
> +{
> +  /* C code for the stuff we're doing below:
> +       tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
> +       return (long)tmp;
> +   */
> +  enum machine_mode mode = GET_MODE (op1);
> +  const struct real_format *fmt;
> +  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
> +  rtx adj;
> +
> +  /* load nextafter (0.5, 0.0) */
> +  fmt = REAL_MODE_FORMAT (mode);
> +  real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
> +  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
> +
> +  /* adj = copysign (0.5, op1) */
> +  adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
> +  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1));
> +
> +  /* adj = op1 + adj */
> +  expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
> +
> +  /* op0 = (imode)adj */
> +  expand_fix (op0, adj, 0);
> +}
>  
>  #include "gt-i386.h"
> Index: gcc/config/i386/i386.md
> ===================================================================
> --- gcc.orig/config/i386/i386.md
> +++ gcc/config/i386/i386.md
> @@ -17341,6 +17341,26 @@
>    "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
>    "")
>  
> +(define_expand "lround<mode>di2"
> +  [(match_operand:DI 0 "nonimmediate_operand" "")
> +   (match_operand:SSEMODEF 1 "register_operand" "")]
> +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
> +   && !flag_trapping_math && !flag_rounding_math"
> +{
> +  ix86_expand_lround (operand0, operand1);
> +  DONE;
> +})
> +
> +(define_expand "lround<mode>si2"
> +  [(match_operand:SI 0 "nonimmediate_operand" "")
> +   (match_operand:SSEMODEF 1 "register_operand" "")]
> +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> +   && !flag_trapping_math && !flag_rounding_math"
> +{
> +  ix86_expand_lround (operand0, operand1);
> +  DONE;
> +})
> +
>  ;; Rounding mode control word calculation could clobber FLAGS_REG.
>  (define_insn_and_split "frndintxf2_floor"
>    [(set (match_operand:XF 0 "register_operand" "=f")
> Index: gcc/testsuite/gcc.target/i386/math-torture/lround.c
> ===================================================================
> --- /dev/null
> +++ gcc/testsuite/gcc.target/i386/math-torture/lround.c
> @@ -0,0 +1,26 @@
> +/* { dg-do assemble } */
> +
> +long testlf (float x)
> +{
> +  return __builtin_lroundf (x);
> +}
> +long testl (double x)
> +{
> +  return __builtin_lround (x);
> +}
> +long testll (long double x)
> +{
> +  return __builtin_lroundl (x);
> +}
> +long long testllf (float x)
> +{
> +  return __builtin_llroundf (x);
> +}
> +long long testll_ (double x)
> +{
> +  return __builtin_llround (x);
> +}
> +long long testlll (long double x)
> +{
> +  return __builtin_llroundl (x);
> +}
> Index: gcc/doc/md.texi
> ===================================================================
> --- gcc.orig/doc/md.texi
> +++ gcc/doc/md.texi
> @@ -3607,6 +3607,12 @@ Convert operand 1 (valid for floating po
>  point mode @var{n} as a signed number according to the current
>  rounding mode and store in operand 0 (which has mode @var{n}).
>  
> +@cindex @code{lround@var{m}@var{n}2}
> +@item @samp{lround@var{m}2}
> +Convert operand 1 (valid for floating point mode @var{m}) to fixed
> +point mode @var{n} as a signed number rounding to nearest and away
> +from zero and store in operand 0 (which has mode @var{n}).
> +
>  @cindex @code{copysign@var{m}3} instruction pattern
>  @item @samp{copysign@var{m}3}
>  Store a value with the magnitude of operand 1 and the sign of operand
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]