This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Bad choices by expand_mult_highpart

From: Richard Sandiford <rsandifo at redhat dot com>
To: gcc-patches at gcc dot gnu dot org
Date: Thu, 18 Mar 2004 12:51:09 +0000
Subject: Bad choices by expand_mult_highpart
expand_mult_highpart tries several ways of multiplying by a constant.
Unfortunately, the first of these is:

  /* expand_mult handles constant multiplication of word_mode
     or narrower.  It does a poor job for large modes.  */
  if (size < BITS_PER_WORD
      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
    {
      /* We have to do this, since expand_binop doesn't do conversion for
	 multiply.  Maybe change expand_binop to handle widening multiply?  */
      op0 = convert_to_mode (wider_mode, op0, unsignedp);

      /* We know that this can't have signed overflow, so pretend this is
         an unsigned multiply.  */
      tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
			  build_int_2 (size, 0), NULL_RTX, 1);
      return convert_modes (mode, wider_mode, tem, unsignedp);
    }

which means that, on a 64-bit target, most SImode highpart multiplications
are handled by a DImode expand_mult.  It in turn uses add/shift sequences
if they are cheap, otherwise it falls back on muldi3.  Patterns like mulsidi3
and mulsi_highpart aren't considered.

It's a bit difficult to fix this with the current structure of the code,
so the patch creates the following new functions:

   - choose_mult_variant, split out from expand_mult, finds the cheapest
     way of multiplying by a constant.  It returns true if multiplying
     by a constant is cheaper than standard register multiplication
     in the same mode.

   - expand_mult_const, split out from expand_mult, implements the
     choice made by choose_mult_variant.

   - expand_mult_highpart_optab, split out from expand_mult_highpart,
     tries doing a highpart multiplication using optabs (ignoring
     shift/add sequences).

It's then possible for expand_mult_highpart to calculate the cost of
a wider_mode shift/add multiplication without actually expanding it.

There are a couple of other cleanups.  The code:

      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
			  build_int_2 (size, 0), NULL_RTX, 1);
      return convert_modes (mode, wider_mode, tem, unsignedp);

appeared several times, so I put it into its own function.  This avoids a
gratuitous goto in expand_mult_highpart and means that all callers benefit
from the gen_highpart optimisation that previously only appeared once.

Also, some parts of expand_mult_highpart had code like:

      target = expand_binop (.... target ...);
      if (target)
	return target;

so if the expansion failed, later code wouldn't be able to use
the caller's suggested target.  I changed this to:

      tem = expand_binop (.... target ...);
      if (tem)
	return tem;

As an example of the patch in action, mips64el-elf use to compile:

    int f (int x) { return x / 10000; }

as:

        li      $3,1759182848                   # 0x68db0000
        sll     $2,$4,0
        ori     $3,$3,0x8bad
        dmult   $2,$3
        sra     $4,$4,31
        mflo    $2
        dsra    $2,$2,32
        sra     $2,$2,12
        j       $31
        subu    $2,$2,$4

but now uses:

        li      $2,1759182848                   # 0x68db0000
        ori     $2,$2,0x8bad
        mult    $4,$2
        sra     $4,$4,31
        mfhi    $2
        sra     $2,$2,12
        j       $31
        subu    $2,$2,$4

Bootstrapped & regression tested on mips64{,el}-linux-gnu.  OK to install?

Richard


	* expmed.c (choose_mult_variant, expand_mult_const): New, split from...
	(expand_mult): ...here.
	(extract_high_half): New, split out from expand_mult_highpart.
	(expand_highpart_optab): Likewise.  Don't clobber target prematurely.
	(expand_highpart): Evaluate the cost of a shift/add sequence,
	then see if any of the specialized optabs are cheaper.

Index: expmed.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expmed.c,v
retrieving revision 1.151
diff -c -p -F^\([(a-zA-Z0-9_]\|#define\) -r1.151 expmed.c
*** expmed.c	14 Mar 2004 22:26:05 -0000	1.151
--- expmed.c	18 Mar 2004 12:42:47 -0000
*************** struct algorithm
*** 2149,2159 ****
--- 2149,2172 ----
    char log[MAX_BITS_PER_WORD];
  };
  
+ /* Indicates the type of fixup needed after a constant multiplication.
+    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
+    the result should be negated, and ADD_VARIANT means that the
+    multiplicand should be added to the result.  */
+ enum mult_variant {basic_variant, negate_variant, add_variant};
+ 
  static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
+ static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
+ 				 struct algorithm *, enum mult_variant *);
+ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
+ 			      const struct algorithm *, enum mult_variant);
+ static rtx extract_high_half (enum machine_mode, rtx);
  static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
  						 int, unsigned HOST_WIDE_INT *,
  						 int *, int *);
  static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
+ static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+ 				       int, int);
  /* Compute and return the best algorithm for multiplying by T.
     The algorithm must cost less than cost_limit
     If retval.cost >= COST_LIMIT, no algorithm was found and all
*************** synth_mult (struct algorithm *alg_out, u
*** 2396,2401 ****
--- 2409,2606 ----
  	  alg_out->ops * sizeof *alg_out->log);
  }
  
+ /* Find the cheapeast way of multiplying a value of mode MODE by VAL.
+    Try three variations:
+ 
+        - a shift/add sequence based on VAL itself
+        - a shift/add sequence based on -VAL, followed by a negation
+        - a shift/add sequence based on VAL - 1, followed by an addition.
+ 
+    Return true if the cheapest of these is better than register
+    multiplication, describing the algorithm in *ALG and final
+    fixup in *VARIANT.  */
+ 
+ static bool
+ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
+ 		     struct algorithm *alg, enum mult_variant *variant)
+ {
+   int mult_cost;
+   struct algorithm alg2;
+   rtx reg;
+ 
+   reg = gen_rtx_REG (mode, FIRST_PSEUDO_REGISTER);
+   mult_cost = rtx_cost (gen_rtx_MULT (mode, reg, GEN_INT (val)), SET);
+   mult_cost = MIN (12 * add_cost, mult_cost);
+ 
+   *variant = basic_variant;
+   synth_mult (alg, val, mult_cost);
+ 
+   /* This works only if the inverted value actually fits in an
+      `unsigned int' */
+   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
+     {
+       synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost);
+       alg2.cost += negate_cost;
+       if (alg2.cost < alg->cost)
+ 	*alg = alg2, *variant = negate_variant;
+     }
+ 
+   /* This proves very useful for division-by-constant.  */
+   synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost);
+   alg2.cost += add_cost;
+   if (alg2.cost < alg->cost)
+     *alg = alg2, *variant = add_variant;
+ 
+   return alg->cost < mult_cost;
+ }
+ 
+ /* A subroutine of expand_mult, used for constant multiplications.
+    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
+    convenient.  Use the shift/add sequence described by ALG and apply
+    the final fixup specified by VARIANT.  */
+ 
+ static rtx
+ expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
+ 		   rtx target, const struct algorithm *alg,
+ 		   enum mult_variant variant)
+ {
+   HOST_WIDE_INT val_so_far;
+   rtx insn, accum, tem;
+   int opno;
+   enum machine_mode nmode;
+ 
+   /* op0 must be register to make mult_cost match the precomputed
+      shiftadd_cost array.  */
+   op0 = protect_from_queue (op0, 0);
+ 
+   /* Avoid referencing memory over and over.
+      For speed, but also for correctness when mem is volatile.  */
+   if (GET_CODE (op0) == MEM)
+     op0 = force_reg (mode, op0);
+ 
+   /* ACCUM starts out either as OP0 or as a zero, depending on
+      the first operation.  */
+ 
+   if (alg->op[0] == alg_zero)
+     {
+       accum = copy_to_mode_reg (mode, const0_rtx);
+       val_so_far = 0;
+     }
+   else if (alg->op[0] == alg_m)
+     {
+       accum = copy_to_mode_reg (mode, op0);
+       val_so_far = 1;
+     }
+   else
+     abort ();
+ 
+   for (opno = 1; opno < alg->ops; opno++)
+     {
+       int log = alg->log[opno];
+       int preserve = preserve_subexpressions_p ();
+       rtx shift_subtarget = preserve ? 0 : accum;
+       rtx add_target
+ 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
+ 	   && ! preserve)
+ 	  ? target : 0;
+       rtx accum_target = preserve ? 0 : accum;
+ 
+       switch (alg->op[opno])
+ 	{
+ 	case alg_shift:
+ 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ 				build_int_2 (log, 0), NULL_RTX, 0);
+ 	  val_so_far <<= log;
+ 	  break;
+ 
+ 	case alg_add_t_m2:
+ 	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
+ 			      build_int_2 (log, 0), NULL_RTX, 0);
+ 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+ 				 add_target ? add_target : accum_target);
+ 	  val_so_far += (HOST_WIDE_INT) 1 << log;
+ 	  break;
+ 
+ 	case alg_sub_t_m2:
+ 	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
+ 			      build_int_2 (log, 0), NULL_RTX, 0);
+ 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
+ 				 add_target ? add_target : accum_target);
+ 	  val_so_far -= (HOST_WIDE_INT) 1 << log;
+ 	  break;
+ 
+ 	case alg_add_t2_m:
+ 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ 				build_int_2 (log, 0), shift_subtarget,
+ 				0);
+ 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
+ 				 add_target ? add_target : accum_target);
+ 	  val_so_far = (val_so_far << log) + 1;
+ 	  break;
+ 
+ 	case alg_sub_t2_m:
+ 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ 				build_int_2 (log, 0), shift_subtarget, 0);
+ 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
+ 				 add_target ? add_target : accum_target);
+ 	  val_so_far = (val_so_far << log) - 1;
+ 	  break;
+ 
+ 	case alg_add_factor:
+ 	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
+ 			      build_int_2 (log, 0), NULL_RTX, 0);
+ 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+ 				 add_target ? add_target : accum_target);
+ 	  val_so_far += val_so_far << log;
+ 	  break;
+ 
+ 	case alg_sub_factor:
+ 	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
+ 			      build_int_2 (log, 0), NULL_RTX, 0);
+ 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
+ 				 (add_target ? add_target
+ 				  : preserve ? 0 : tem));
+ 	  val_so_far = (val_so_far << log) - val_so_far;
+ 	  break;
+ 
+ 	default:
+ 	  abort ();
+ 	}
+ 
+       /* Write a REG_EQUAL note on the last insn so that we can cse
+ 	 multiplication sequences.  Note that if ACCUM is a SUBREG,
+ 	 we've set the inner register and must properly indicate
+ 	 that.  */
+ 
+       tem = op0, nmode = mode;
+       if (GET_CODE (accum) == SUBREG)
+ 	{
+ 	  nmode = GET_MODE (SUBREG_REG (accum));
+ 	  tem = gen_lowpart (nmode, op0);
+ 	}
+ 
+       insn = get_last_insn ();
+       set_unique_reg_note (insn, REG_EQUAL,
+ 			   gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
+     }
+ 
+   if (variant == negate_variant)
+     {
+       val_so_far = -val_so_far;
+       accum = expand_unop (mode, neg_optab, accum, target, 0);
+     }
+   else if (variant == add_variant)
+     {
+       val_so_far = val_so_far + 1;
+       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
+     }
+ 
+   if (val != val_so_far)
+     abort ();
+ 
+   return accum;
+ }
+ 
  /* Perform a multiplication and return an rtx for the result.
     MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
     TARGET is a suggestion for where to store the result (an rtx).
*************** expand_mult (enum machine_mode mode, rtx
*** 2409,2414 ****
--- 2614,2621 ----
  	     int unsignedp)
  {
    rtx const_op1 = op1;
+   enum mult_variant variant;
+   struct algorithm algorithm;
  
    /* synth_mult does an `unsigned int' multiply.  As long as the mode is
       less than or equal in size to `unsigned int' this doesn't matter.
*************** expand_mult (enum machine_mode mode, rtx
*** 2435,2624 ****
       that it seems better to use synth_mult always.  */
  
    if (const_op1 && GET_CODE (const_op1) == CONST_INT
!       && (unsignedp || ! flag_trapv))
!     {
!       struct algorithm alg;
!       struct algorithm alg2;
!       HOST_WIDE_INT val = INTVAL (op1);
!       HOST_WIDE_INT val_so_far;
!       rtx insn;
!       int mult_cost;
!       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
! 
!       /* op0 must be register to make mult_cost match the precomputed
!          shiftadd_cost array.  */
!       op0 = force_reg (mode, op0);
! 
!       /* Try to do the computation three ways: multiply by the negative of OP1
! 	 and then negate, do the multiplication directly, or do multiplication
! 	 by OP1 - 1.  */
! 
!       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
!       mult_cost = MIN (12 * add_cost, mult_cost);
! 
!       synth_mult (&alg, val, mult_cost);
! 
!       /* This works only if the inverted value actually fits in an
! 	 `unsigned int' */
!       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
! 	{
! 	  synth_mult (&alg2, - val,
! 		      (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
! 	  if (alg2.cost + negate_cost < alg.cost)
! 	    alg = alg2, variant = negate_variant;
! 	}
! 
!       /* This proves very useful for division-by-constant.  */
!       synth_mult (&alg2, val - 1,
! 		  (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
!       if (alg2.cost + add_cost < alg.cost)
! 	alg = alg2, variant = add_variant;
! 
!       if (alg.cost < mult_cost)
! 	{
! 	  /* We found something cheaper than a multiply insn.  */
! 	  int opno;
! 	  rtx accum, tem;
! 	  enum machine_mode nmode;
! 
! 	  op0 = protect_from_queue (op0, 0);
! 
! 	  /* Avoid referencing memory over and over.
! 	     For speed, but also for correctness when mem is volatile.  */
! 	  if (GET_CODE (op0) == MEM)
! 	    op0 = force_reg (mode, op0);
! 
! 	  /* ACCUM starts out either as OP0 or as a zero, depending on
! 	     the first operation.  */
! 
! 	  if (alg.op[0] == alg_zero)
! 	    {
! 	      accum = copy_to_mode_reg (mode, const0_rtx);
! 	      val_so_far = 0;
! 	    }
! 	  else if (alg.op[0] == alg_m)
! 	    {
! 	      accum = copy_to_mode_reg (mode, op0);
! 	      val_so_far = 1;
! 	    }
! 	  else
! 	    abort ();
! 
! 	  for (opno = 1; opno < alg.ops; opno++)
! 	    {
! 	      int log = alg.log[opno];
! 	      int preserve = preserve_subexpressions_p ();
! 	      rtx shift_subtarget = preserve ? 0 : accum;
! 	      rtx add_target
! 		= (opno == alg.ops - 1 && target != 0 && variant != add_variant
! 		   && ! preserve)
! 		  ? target : 0;
! 	      rtx accum_target = preserve ? 0 : accum;
! 
! 	      switch (alg.op[opno])
! 		{
! 		case alg_shift:
! 		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
! 					build_int_2 (log, 0), NULL_RTX, 0);
! 		  val_so_far <<= log;
! 		  break;
! 
! 		case alg_add_t_m2:
! 		  tem = expand_shift (LSHIFT_EXPR, mode, op0,
! 				      build_int_2 (log, 0), NULL_RTX, 0);
! 		  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
! 					 add_target
! 					 ? add_target : accum_target);
! 		  val_so_far += (HOST_WIDE_INT) 1 << log;
! 		  break;
! 
! 		case alg_sub_t_m2:
! 		  tem = expand_shift (LSHIFT_EXPR, mode, op0,
! 				      build_int_2 (log, 0), NULL_RTX, 0);
! 		  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
! 					 add_target
! 					 ? add_target : accum_target);
! 		  val_so_far -= (HOST_WIDE_INT) 1 << log;
! 		  break;
! 
! 		case alg_add_t2_m:
! 		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
! 					build_int_2 (log, 0), shift_subtarget,
! 					0);
! 		  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
! 					 add_target
! 					 ? add_target : accum_target);
! 		  val_so_far = (val_so_far << log) + 1;
! 		  break;
! 
! 		case alg_sub_t2_m:
! 		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
! 					build_int_2 (log, 0), shift_subtarget,
! 					0);
! 		  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
! 					 add_target
! 					 ? add_target : accum_target);
! 		  val_so_far = (val_so_far << log) - 1;
! 		  break;
! 
! 		case alg_add_factor:
! 		  tem = expand_shift (LSHIFT_EXPR, mode, accum,
! 				      build_int_2 (log, 0), NULL_RTX, 0);
! 		  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
! 					 add_target
! 					 ? add_target : accum_target);
! 		  val_so_far += val_so_far << log;
! 		  break;
! 
! 		case alg_sub_factor:
! 		  tem = expand_shift (LSHIFT_EXPR, mode, accum,
! 				      build_int_2 (log, 0), NULL_RTX, 0);
! 		  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
! 					 (add_target ? add_target
! 					  : preserve ? 0 : tem));
! 		  val_so_far = (val_so_far << log) - val_so_far;
! 		  break;
! 
! 		default:
! 		  abort ();
! 		}
! 
! 	      /* Write a REG_EQUAL note on the last insn so that we can cse
! 		 multiplication sequences.  Note that if ACCUM is a SUBREG,
! 		 we've set the inner register and must properly indicate
! 		 that.  */
! 
! 	      tem = op0, nmode = mode;
! 	      if (GET_CODE (accum) == SUBREG)
! 		{
! 		  nmode = GET_MODE (SUBREG_REG (accum));
! 		  tem = gen_lowpart (nmode, op0);
! 		}
! 
! 	      insn = get_last_insn ();
! 	      set_unique_reg_note (insn,
! 				   REG_EQUAL,
! 				   gen_rtx_MULT (nmode, tem,
! 					         GEN_INT (val_so_far)));
! 	    }
! 
! 	  if (variant == negate_variant)
! 	    {
! 	      val_so_far = - val_so_far;
! 	      accum = expand_unop (mode, neg_optab, accum, target, 0);
! 	    }
! 	  else if (variant == add_variant)
! 	    {
! 	      val_so_far = val_so_far + 1;
! 	      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
! 	    }
! 
! 	  if (val != val_so_far)
! 	    abort ();
! 
! 	  return accum;
! 	}
!     }
  
    if (GET_CODE (op0) == CONST_DOUBLE)
      {
--- 2642,2651 ----
       that it seems better to use synth_mult always.  */
  
    if (const_op1 && GET_CODE (const_op1) == CONST_INT
!       && (unsignedp || !flag_trapv)
!       && choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant))
!     return expand_mult_const (mode, op0, INTVAL (const_op1), target,
! 			      &algorithm, variant);
  
    if (GET_CODE (op0) == CONST_DOUBLE)
      {
*************** expand_mult_highpart_adjust (enum machin
*** 2832,2901 ****
    return target;
  }
  
! /* Emit code to multiply OP0 and CNST1, putting the high half of the result
!    in TARGET if that is convenient, and return where the result is.  If the
!    operation can not be performed, 0 is returned.
  
!    MODE is the mode of operation and result.
  
!    UNSIGNEDP nonzero means unsigned multiply.
  
!    MAX_COST is the total allowed cost for the expanded RTL.  */
  
! rtx
! expand_mult_highpart (enum machine_mode mode, rtx op0,
! 		      unsigned HOST_WIDE_INT cnst1, rtx target,
! 		      int unsignedp, int max_cost)
  {
!   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
!   optab mul_highpart_optab;
    optab moptab;
    rtx tem;
!   int size = GET_MODE_BITSIZE (mode);
!   rtx op1, wide_op1;
! 
!   /* We can't support modes wider than HOST_BITS_PER_INT.  */
!   if (size > HOST_BITS_PER_WIDE_INT)
!     abort ();
  
!   op1 = gen_int_mode (cnst1, mode);
! 
!   wide_op1
!     = immed_double_const (cnst1,
! 			  (unsignedp
! 			   ? (HOST_WIDE_INT) 0
! 			   : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
! 			  wider_mode);
! 
!   /* expand_mult handles constant multiplication of word_mode
!      or narrower.  It does a poor job for large modes.  */
!   if (size < BITS_PER_WORD
!       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
!     {
!       /* We have to do this, since expand_binop doesn't do conversion for
! 	 multiply.  Maybe change expand_binop to handle widening multiply?  */
!       op0 = convert_to_mode (wider_mode, op0, unsignedp);
! 
!       /* We know that this can't have signed overflow, so pretend this is
!          an unsigned multiply.  */
!       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
!       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
! 			  build_int_2 (size, 0), NULL_RTX, 1);
!       return convert_modes (mode, wider_mode, tem, unsignedp);
!     }
! 
!   if (target == 0)
!     target = gen_reg_rtx (mode);
  
    /* Firstly, try using a multiplication insn that only generates the needed
       high part of the product, and in the sign flavor of unsignedp.  */
    if (mul_highpart_cost[(int) mode] < max_cost)
      {
!       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
!       target = expand_binop (mode, mul_highpart_optab,
! 			     op0, op1, target, unsignedp, OPTAB_DIRECT);
!       if (target)
! 	return target;
      }
  
    /* Secondly, same as above, but use sign flavor opposite of unsignedp.
--- 2859,2904 ----
    return target;
  }
  
! /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
  
! static rtx
! extract_high_half (enum machine_mode mode, rtx op)
! {
!   enum machine_mode wider_mode;
  
!   if (mode == word_mode)
!     return gen_highpart (mode, op);
  
!   wider_mode = GET_MODE_WIDER_MODE (mode);
!   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
! 		     build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
!   return convert_modes (mode, wider_mode, op, 0);
! }
  
! /* Like expand_mult_highpart, but only consider using a multiplication
!    optab.  OP1 is an rtx for the constant operand.  */
! 
! static rtx
! expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
! 			    rtx target, int unsignedp, int max_cost)
  {
!   enum machine_mode wider_mode;
    optab moptab;
    rtx tem;
!   int size;
  
!   wider_mode = GET_MODE_WIDER_MODE (mode);
!   size = GET_MODE_BITSIZE (mode);
  
    /* Firstly, try using a multiplication insn that only generates the needed
       high part of the product, and in the sign flavor of unsignedp.  */
    if (mul_highpart_cost[(int) mode] < max_cost)
      {
!       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
!       tem = expand_binop (mode, moptab, op0, op1, target,
! 			  unsignedp, OPTAB_DIRECT);
!       if (tem)
! 	return tem;
      }
  
    /* Secondly, same as above, but use sign flavor opposite of unsignedp.
*************** expand_mult_highpart (enum machine_mode 
*** 2904,2916 ****
        && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
  	  < max_cost))
      {
!       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
!       target = expand_binop (mode, mul_highpart_optab,
! 			     op0, op1, target, unsignedp, OPTAB_DIRECT);
!       if (target)
  	/* We used the wrong signedness.  Adjust the result.  */
! 	return expand_mult_highpart_adjust (mode, target, op0,
! 					    op1, target, unsignedp);
      }
  
    /* Try widening multiplication.  */
--- 2907,2919 ----
        && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
  	  < max_cost))
      {
!       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
!       tem = expand_binop (mode, moptab, op0, op1, target,
! 			  unsignedp, OPTAB_DIRECT);
!       if (tem)
  	/* We used the wrong signedness.  Adjust the result.  */
! 	return expand_mult_highpart_adjust (mode, tem, op0, op1,
! 					    tem, unsignedp);
      }
  
    /* Try widening multiplication.  */
*************** expand_mult_highpart (enum machine_mode 
*** 2918,2925 ****
    if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
        && mul_widen_cost[(int) wider_mode] < max_cost)
      {
!       op1 = force_reg (mode, op1);
!       goto try;
      }
  
    /* Try widening the mode and perform a non-widening multiplication.  */
--- 2921,2930 ----
    if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
        && mul_widen_cost[(int) wider_mode] < max_cost)
      {
!       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
! 			  unsignedp, OPTAB_WIDEN);
!       if (tem == 0)
! 	return extract_high_half (mode, tem);
      }
  
    /* Try widening the mode and perform a non-widening multiplication.  */
*************** expand_mult_highpart (enum machine_mode 
*** 2928,2935 ****
        && size - 1 < BITS_PER_WORD
        && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
      {
!       op1 = wide_op1;
!       goto try;
      }
  
    /* Try widening multiplication of opposite signedness, and adjust.  */
--- 2933,2942 ----
        && size - 1 < BITS_PER_WORD
        && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
      {
!       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
! 			  unsignedp, OPTAB_WIDEN);
!       if (tem == 0)
! 	return extract_high_half (mode, tem);
      }
  
    /* Try widening multiplication of opposite signedness, and adjust.  */
*************** expand_mult_highpart (enum machine_mode 
*** 2944,2953 ****
  			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
        if (tem != 0)
  	{
! 	  /* Extract the high half of the just generated product.  */
! 	  tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
! 			      build_int_2 (size, 0), NULL_RTX, 1);
! 	  tem = convert_modes (mode, wider_mode, tem, unsignedp);
  	  /* We used the wrong signedness.  Adjust the result.  */
  	  return expand_mult_highpart_adjust (mode, tem, op0, op1,
  					      target, unsignedp);
--- 2951,2957 ----
  			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
        if (tem != 0)
  	{
! 	  tem = extract_high_half (mode, tem);
  	  /* We used the wrong signedness.  Adjust the result.  */
  	  return expand_mult_highpart_adjust (mode, tem, op0, op1,
  					      target, unsignedp);
*************** expand_mult_highpart (enum machine_mode 
*** 2955,2979 ****
      }
  
    return 0;
  
!  try:
!   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
!   tem = expand_binop (wider_mode, moptab, op0, op1,
! 		      NULL_RTX, unsignedp, OPTAB_WIDEN);
!   if (tem == 0)
!     return 0;
  
!   /* Extract the high half of the just generated product.  */
!   if (mode == word_mode)
!     {
!       return gen_highpart (mode, tem);
!     }
!   else
!     {
!       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
! 			  build_int_2 (size, 0), NULL_RTX, 1);
!       return convert_modes (mode, wider_mode, tem, unsignedp);
      }
  }
  
  /* Emit the code to divide OP0 by OP1, putting the result in TARGET
--- 2959,3010 ----
      }
  
    return 0;
+ }
  
! /* Emit code to multiply OP0 and CNST1, putting the high half of the result
!    in TARGET if that is convenient, and return where the result is.  If the
!    operation can not be performed, 0 is returned.
  
!    MODE is the mode of operation and result.
! 
!    UNSIGNEDP nonzero means unsigned multiply.
! 
!    MAX_COST is the total allowed cost for the expanded RTL.  */
! 
! rtx
! expand_mult_highpart (enum machine_mode mode, rtx op0,
! 		      unsigned HOST_WIDE_INT cnst1, rtx target,
! 		      int unsignedp, int max_cost)
! {
!   enum machine_mode wider_mode;
!   enum mult_variant variant;
!   struct algorithm alg;
!   rtx op1, tem;
! 
!   /* We can't support modes wider than HOST_BITS_PER_INT.  */
!   if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
!     abort ();
! 
!   op1 = gen_int_mode (cnst1, mode);
! 
!   /* See whether shift/add multiplication is cheap enough.  */
!   if (choose_mult_variant (mode, cnst1, &alg, &variant)
!       && (alg.cost += shift_cost[GET_MODE_BITSIZE (mode) - 1]) < max_cost)
!     {
!       /* See whether the specialized multiplication optabs are
! 	 cheaper than the shift/add version.  */
!       tem = expand_mult_highpart_optab (mode, op0, op1, target,
! 					unsignedp, alg.cost);
!       if (tem)
! 	return tem;
! 
!       wider_mode = GET_MODE_WIDER_MODE (mode);
!       op0 = convert_to_mode (wider_mode, op0, unsignedp);
!       tem = expand_mult_const (wider_mode, op0, cnst1, 0, &alg, variant);
!       return extract_high_half (mode, tem);
      }
+   return expand_mult_highpart_optab (mode, op0, op1, target,
+ 				     unsignedp, max_cost);
  }
  
  /* Emit the code to divide OP0 by OP1, putting the result in TARGET
Follow-Ups:
- Re: Bad choices by expand_mult_highpart
  - From: Richard Sandiford
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]