[PATCH] Fix PR25620, pow() expansion missed-optimization, 2nd try

Richard Guenther rguenther@suse.de
Mon Nov 13 17:00:00 GMT 2006


On Mon, 13 Nov 2006, Richard Guenther wrote:

> On Sun, 12 Nov 2006, Roger Sayle wrote:
> 
> > 
> > Hi Richard,
> > 
> > On Sun, 12 Nov 2006, Richard Guenther wrote:
> > > Bootstrapped and regtested on x86_64-unknown-linux-gnu.  SPEC 2k
> > > doesn't show a difference, Polyhedron has an improvement for air
> > > (down from 21s to 16.5s runtime).
> > 
> > Cool!  I hadn't appreciated this had such a significant impact on
> > polyhedron.
> > 
> > 
> > This is much better, thanks.  However, there are still a number of
> > (new) issues that need to be resolved.
> > 
> > The first is a potential bug...
> > 
> > > ! 	      expand_simple_binop (mode, MULT, op, op2, op, 0, OPTAB_DIRECT);
> > 
> > The expand_binop, and expand_simple_binop, APIs treat their target
> > argument as a hint.  Hence the return result of expand_simple_binop
> > isn't guaranteed to be in target_rtx.  Hence you need to use
> > 
> > 	op = expand_simple_binop (....)
> 
> Whoops.  Looks like I have to fix some of the i386 rounding expanders
> as well here...

I was convinced that for correctness these are already ok as they will
not fail to use the target register passed.  Still for consistency the
patch would look like below.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

I verified it creates the same code as before.

Ok for mainline?

Richard.

2006-11-13  Richard Guenther  <rguenther@suse.de>

	* config/i386/i386.c (ix86_expand_lround): Handle expand_simple_binop
	return value.
	(ix86_expand_lfloorceil): Likewise.
	(ix86_expand_rint): Likewise.
	(ix86_expand_floorceildf_32): Likewise.
	(ix86_expand_floorceil): Likewise.
	(ix86_expand_rounddf_32): Likewise.
	(ix86_expand_truncdf_32): Likewise.
	(ix86_expand_round): Likewise.

Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c	(revision 118752)
--- config/i386/i386.c	(working copy)
*************** ix86_expand_lround (rtx op0, rtx op1)
*** 19448,19454 ****
    ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
  
    /* adj = op1 + adj */
!   expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
  
    /* op0 = (imode)adj */
    expand_fix (op0, adj, 0);
--- 19448,19454 ----
    ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
  
    /* adj = op1 + adj */
!   adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
  
    /* op0 = (imode)adj */
    expand_fix (op0, adj, 0);
*************** ix86_expand_lfloorceil (rtx op0, rtx op1
*** 19466,19472 ****
     */
    enum machine_mode fmode = GET_MODE (op1);
    enum machine_mode imode = GET_MODE (op0);
!   rtx ireg, freg, label;
  
    /* reg = (long)op1 */
    ireg = gen_reg_rtx (imode);
--- 19466,19472 ----
     */
    enum machine_mode fmode = GET_MODE (op1);
    enum machine_mode imode = GET_MODE (op0);
!   rtx ireg, freg, label, tmp;
  
    /* reg = (long)op1 */
    ireg = gen_reg_rtx (imode);
*************** ix86_expand_lfloorceil (rtx op0, rtx op1
*** 19479,19486 ****
    /* ireg = (freg > op1) ? ireg - 1 : ireg */
    label = ix86_expand_sse_compare_and_jump (UNLE,
  					    freg, op1, !do_floor);
!   expand_simple_binop (imode, do_floor ? MINUS : PLUS,
!                        ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
    emit_label (label);
    LABEL_NUSES (label) = 1;
  
--- 19479,19488 ----
    /* ireg = (freg > op1) ? ireg - 1 : ireg */
    label = ix86_expand_sse_compare_and_jump (UNLE,
  					    freg, op1, !do_floor);
!   tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
! 			     ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
!   emit_move_insn (ireg, tmp);
! 
    emit_label (label);
    LABEL_NUSES (label) = 1;
  
*************** ix86_expand_rint (rtx operand0, rtx oper
*** 19512,19519 ****
    TWO52 = ix86_gen_TWO52 (mode);
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
  
!   expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
!   expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
  
    ix86_sse_copysign_to_positive (res, xa, res, mask);
  
--- 19514,19521 ----
    TWO52 = ix86_gen_TWO52 (mode);
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
  
!   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
!   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
  
    ix86_sse_copysign_to_positive (res, xa, res, mask);
  
*************** ix86_expand_floorceildf_32 (rtx operand0
*** 19559,19566 ****
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
  
    /* xa = xa + TWO52 - TWO52; */
!   expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
!   expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
  
    /* xa = copysign (xa, operand1) */
    ix86_sse_copysign_to_positive (xa, xa, res, mask);
--- 19561,19568 ----
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
  
    /* xa = xa + TWO52 - TWO52; */
!   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
!   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
  
    /* xa = copysign (xa, operand1) */
    ix86_sse_copysign_to_positive (xa, xa, res, mask);
*************** ix86_expand_floorceildf_32 (rtx operand0
*** 19575,19582 ****
    emit_insn (gen_rtx_SET (VOIDmode, tmp,
                            gen_rtx_AND (mode, one, tmp)));
    /* We always need to subtract here to preserve signed zero.  */
!   expand_simple_binop (mode, MINUS,
!                        xa, tmp, res, 0, OPTAB_DIRECT);
  
    emit_label (label);
    LABEL_NUSES (label) = 1;
--- 19577,19585 ----
    emit_insn (gen_rtx_SET (VOIDmode, tmp,
                            gen_rtx_AND (mode, one, tmp)));
    /* We always need to subtract here to preserve signed zero.  */
!   tmp = expand_simple_binop (mode, MINUS,
! 			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
!   emit_move_insn (res, tmp);
  
    emit_label (label);
    LABEL_NUSES (label) = 1;
*************** ix86_expand_floorceil (rtx operand0, rtx
*** 19632,19639 ****
    tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
    emit_insn (gen_rtx_SET (VOIDmode, tmp,
                            gen_rtx_AND (mode, one, tmp)));
!   expand_simple_binop (mode, do_floor ? MINUS : PLUS,
!                        xa, tmp, res, 0, OPTAB_DIRECT);
  
    if (HONOR_SIGNED_ZEROS (mode))
      ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
--- 19635,19643 ----
    tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
    emit_insn (gen_rtx_SET (VOIDmode, tmp,
                            gen_rtx_AND (mode, one, tmp)));
!   tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
! 			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
!   emit_move_insn (res, tmp);
  
    if (HONOR_SIGNED_ZEROS (mode))
      ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
*************** ix86_expand_rounddf_32 (rtx operand0, rt
*** 19683,19702 ****
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
  
    /* xa2 = xa + TWO52 - TWO52; */
!   xa2 = gen_reg_rtx (mode);
!   expand_simple_binop (mode, PLUS, xa, TWO52, xa2, 0, OPTAB_DIRECT);
!   expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
  
    /* dxa = xa2 - xa; */
!   dxa = gen_reg_rtx (mode);
!   expand_simple_binop (mode, MINUS, xa2, xa, dxa, 0, OPTAB_DIRECT);
  
    /* generate 0.5, 1.0 and -0.5 */
    half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
!   one = gen_reg_rtx (mode);
!   expand_simple_binop (mode, PLUS, half, half, one, 0, OPTAB_DIRECT);
!   mhalf = gen_reg_rtx (mode);
!   expand_simple_binop (mode, MINUS, half, one, mhalf, 0, OPTAB_DIRECT);
  
    /* Compensate.  */
    tmp = gen_reg_rtx (mode);
--- 19687,19703 ----
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
  
    /* xa2 = xa + TWO52 - TWO52; */
!   xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
!   xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
  
    /* dxa = xa2 - xa; */
!   dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
  
    /* generate 0.5, 1.0 and -0.5 */
    half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
!   one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
!   mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
! 			       0, OPTAB_DIRECT);
  
    /* Compensate.  */
    tmp = gen_reg_rtx (mode);
*************** ix86_expand_rounddf_32 (rtx operand0, rt
*** 19704,19715 ****
    tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
    emit_insn (gen_rtx_SET (VOIDmode, tmp,
                            gen_rtx_AND (mode, one, tmp)));
!   expand_simple_binop (mode, MINUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
    /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
    tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
    emit_insn (gen_rtx_SET (VOIDmode, tmp,
                            gen_rtx_AND (mode, one, tmp)));
!   expand_simple_binop (mode, PLUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
  
    /* res = copysign (xa2, operand1) */
    ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
--- 19705,19716 ----
    tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
    emit_insn (gen_rtx_SET (VOIDmode, tmp,
                            gen_rtx_AND (mode, one, tmp)));
!   xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
    /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
    tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
    emit_insn (gen_rtx_SET (VOIDmode, tmp,
                            gen_rtx_AND (mode, one, tmp)));
!   xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
  
    /* res = copysign (xa2, operand1) */
    ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
*************** void
*** 19770,19776 ****
  ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
  {
    enum machine_mode mode = GET_MODE (operand0);
!   rtx xa, mask, TWO52, label, one, res, smask;
  
    /* C code for SSE variant we expand below.
          double xa = fabs (x), x2;
--- 19771,19777 ----
  ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
  {
    enum machine_mode mode = GET_MODE (operand0);
!   rtx xa, mask, TWO52, label, one, res, smask, tmp;
  
    /* C code for SSE variant we expand below.
          double xa = fabs (x), x2;
*************** ix86_expand_truncdf_32 (rtx operand0, rt
*** 19798,19805 ****
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
  
    /* res = xa + TWO52 - TWO52; */
!   expand_simple_binop (mode, PLUS, xa, TWO52, res, 0, OPTAB_DIRECT);
!   expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
  
    /* generate 1.0 */
    one = force_reg (mode, const_double_from_real_value (dconst1, mode));
--- 19799,19807 ----
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
  
    /* res = xa + TWO52 - TWO52; */
!   tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
!   tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
!   emit_move_insn (res, tmp);
  
    /* generate 1.0 */
    one = force_reg (mode, const_double_from_real_value (dconst1, mode));
*************** ix86_expand_truncdf_32 (rtx operand0, rt
*** 19808,19815 ****
    mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
    emit_insn (gen_rtx_SET (VOIDmode, mask,
                            gen_rtx_AND (mode, mask, one)));
!   expand_simple_binop (mode, MINUS,
!                        res, mask, res, 0, OPTAB_DIRECT);
  
    /* res = copysign (res, operand1) */
    ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
--- 19810,19818 ----
    mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
    emit_insn (gen_rtx_SET (VOIDmode, mask,
                            gen_rtx_AND (mode, mask, one)));
!   tmp = expand_simple_binop (mode, MINUS,
! 			     res, mask, NULL_RTX, 0, OPTAB_DIRECT);
!   emit_move_insn (res, tmp);
  
    /* res = copysign (res, operand1) */
    ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
*************** ix86_expand_round (rtx operand0, rtx ope
*** 19853,19859 ****
  
    /* xa = xa + 0.5 */
    half = force_reg (mode, const_double_from_real_value (pred_half, mode));
!   expand_simple_binop (mode, PLUS, xa, half, xa, 0, OPTAB_DIRECT);
  
    /* xa = (double)(int64_t)xa */
    xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
--- 19856,19862 ----
  
    /* xa = xa + 0.5 */
    half = force_reg (mode, const_double_from_real_value (pred_half, mode));
!   xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
  
    /* xa = (double)(int64_t)xa */
    xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);



More information about the Gcc-patches mailing list