This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Bad choices by expand_mult_highpart


Roger Sayle wrote:

> > > 	* expmed.c (choose_mult_variant): Pass MULT_COST as argument instead
> > > 	of using register multiplication cost.
> > > 	(expand_mult): Adapt choose_mult_variant call.
> > > 	(expand_mult_highpart): Call choose_mult_variant with WIDER_MODE
> > > 	instead of MODE; pass appropriate cost bound.  Adjust result when
> > > 	performing signed multiplication by a negative constant.
> 
> Approved.

Unfortunately it failed bootstrap on s390 in this form.  There are two
additional problems: if the mode size is smaller than HOST_BITS_PER_WIDE_INT,
we need to clear the high bits in the integer constant before passing it
to choose_mult_variant, and we need to make sure that we don't use a too
wide intermediate mode.

On 32-bit s390, the current code tries to optimize a DImode division 
by performing a *TImode* multipliation using TImode shifts and adds,
which can't be expanded on a 32-bit target.

So we have to ensure that mode is at most word_mode, so that the shifts
and adds are done at most in a double-word mode.  I've tested this,
and it passed bootstrap/regtest on s390/s390x.  However, I guess this
is still not quite correct, since the generated code is really bad.
The problem is that all the adds and shifts are accounted into the
cost calculations by using the *single-word* operation costs.  Since
double-word operations tend to be significantly more expensive, this
makes the calculations completely off ...

I've now restricted even *wider_mode* to at most word_mode.  This should
still allow the case Richard is concerned about (SImode divisions on
64-bit targets), and avoid generating huge amounts of expensive double-
word operations.  The final patch is below; bootstrap/regtest on 
s390-ibm-linux and s390x-ibm-linux is currently running.

OK to commit if the tests pass?

> Ideally, it would be nice to add some execution testcases to the GCC
> testsuite to see if we can catch this failure in future.  Is there a
> real problem with testsuite coverage or is the erroneous code just
> ever triggered on the platforms Richard's patch was test on?

I think one problem was that due to the mode restrictions the whole
division-by-multiplication scenarion triggered only relatively rarely
(e.g. short division on 32-bit targets).  As those restrictions were
lifted by Richard's patch (and in fact are currently much too generous,
see above), the code triggers much more frequently ...

Bye,
Ulrich

ChangeLog:

	* expmed.c (choose_mult_variant): Pass MULT_COST as argument instead
	of using register multiplication cost.
	(expand_mult): Adapt choose_mult_variant call.
	(expand_mult_highpart): Call choose_mult_variant with WIDER_MODE
	of MODE; pass appropriate cost bound.  Adjust result when
	performing signed multiplication by a negative constant.
	Don't use intermediate modes larger than word_mode.


Index: gcc/expmed.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expmed.c,v
retrieving revision 1.152
diff -c -p -r1.152 expmed.c
*** gcc/expmed.c	19 Mar 2004 09:59:00 -0000	1.152
--- gcc/expmed.c	21 Mar 2004 16:29:27 -0000
*************** enum mult_variant {basic_variant, negate
*** 2157,2163 ****
  
  static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
  static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
! 				 struct algorithm *, enum mult_variant *);
  static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
  			      const struct algorithm *, enum mult_variant);
  static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
--- 2157,2163 ----
  
  static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
  static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
! 				 struct algorithm *, enum mult_variant *, int);
  static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
  			      const struct algorithm *, enum mult_variant);
  static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
*************** synth_mult (struct algorithm *alg_out, u
*** 2416,2436 ****
         - a shift/add sequence based on -VAL, followed by a negation
         - a shift/add sequence based on VAL - 1, followed by an addition.
  
!    Return true if the cheapest of these is better than register
!    multiplication, describing the algorithm in *ALG and final
!    fixup in *VARIANT.  */
  
  static bool
  choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
! 		     struct algorithm *alg, enum mult_variant *variant)
  {
-   int mult_cost;
    struct algorithm alg2;
-   rtx reg;
- 
-   reg = gen_rtx_REG (mode, FIRST_PSEUDO_REGISTER);
-   mult_cost = rtx_cost (gen_rtx_MULT (mode, reg, GEN_INT (val)), SET);
-   mult_cost = MIN (12 * add_cost, mult_cost);
  
    *variant = basic_variant;
    synth_mult (alg, val, mult_cost);
--- 2416,2430 ----
         - a shift/add sequence based on -VAL, followed by a negation
         - a shift/add sequence based on VAL - 1, followed by an addition.
  
!    Return true if the cheapest of these cost less than MULT_COST,
!    describing the algorithm in *ALG and final fixup in *VARIANT.  */
  
  static bool
  choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
! 		     struct algorithm *alg, enum mult_variant *variant,
! 		     int mult_cost)
  {
    struct algorithm alg2;
  
    *variant = basic_variant;
    synth_mult (alg, val, mult_cost);
*************** expand_mult (enum machine_mode mode, rtx
*** 2642,2651 ****
       that it seems better to use synth_mult always.  */
  
    if (const_op1 && GET_CODE (const_op1) == CONST_INT
!       && (unsignedp || !flag_trapv)
!       && choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant))
!     return expand_mult_const (mode, op0, INTVAL (const_op1), target,
! 			      &algorithm, variant);
  
    if (GET_CODE (op0) == CONST_DOUBLE)
      {
--- 2636,2651 ----
       that it seems better to use synth_mult always.  */
  
    if (const_op1 && GET_CODE (const_op1) == CONST_INT
!       && (unsignedp || !flag_trapv))
!     {
!       int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
!       mult_cost = MIN (12 * add_cost, mult_cost);
! 
!       if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
! 			       mult_cost))
! 	return expand_mult_const (mode, op0, INTVAL (const_op1), target,
! 				  &algorithm, variant);
!     }
  
    if (GET_CODE (op0) == CONST_DOUBLE)
      {
*************** expand_mult_highpart (enum machine_mode 
*** 2976,2982 ****
  		      unsigned HOST_WIDE_INT cnst1, rtx target,
  		      int unsignedp, int max_cost)
  {
!   enum machine_mode wider_mode;
    enum mult_variant variant;
    struct algorithm alg;
    rtx op1, tem;
--- 2976,2984 ----
  		      unsigned HOST_WIDE_INT cnst1, rtx target,
  		      int unsignedp, int max_cost)
  {
!   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
!   int extra_cost;
!   bool sign_adjust = false;
    enum mult_variant variant;
    struct algorithm alg;
    rtx op1, tem;
*************** expand_mult_highpart (enum machine_mode 
*** 2986,3007 ****
      abort ();
  
    op1 = gen_int_mode (cnst1, mode);
  
    /* See whether shift/add multiplication is cheap enough.  */
!   if (choose_mult_variant (mode, cnst1, &alg, &variant)
!       && (alg.cost += shift_cost[GET_MODE_BITSIZE (mode) - 1]) < max_cost)
      {
        /* See whether the specialized multiplication optabs are
  	 cheaper than the shift/add version.  */
        tem = expand_mult_highpart_optab (mode, op0, op1, target,
! 					unsignedp, alg.cost);
        if (tem)
  	return tem;
  
!       wider_mode = GET_MODE_WIDER_MODE (mode);
!       op0 = convert_to_mode (wider_mode, op0, unsignedp);
!       tem = expand_mult_const (wider_mode, op0, cnst1, 0, &alg, variant);
!       return extract_high_half (mode, tem);
      }
    return expand_mult_highpart_optab (mode, op0, op1, target,
  				     unsignedp, max_cost);
--- 2988,3032 ----
      abort ();
  
    op1 = gen_int_mode (cnst1, mode);
+   cnst1 &= GET_MODE_MASK (mode);
+ 
+   /* We can't optimize modes wider than BITS_PER_WORD. 
+      ??? We might be able to perform double-word arithmetic if 
+      mode == word_mode, however all the cost calculations in
+      synth_mult etc. assume single-word operations.  */
+   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
+     return expand_mult_highpart_optab (mode, op0, op1, target,
+ 				       unsignedp, max_cost);
+ 
+   extra_cost = shift_cost[GET_MODE_BITSIZE (mode) - 1];
+ 
+   /* Check whether we try to multiply by a negative constant.  */
+   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
+     {
+       sign_adjust = true;
+       extra_cost += add_cost;
+     }
  
    /* See whether shift/add multiplication is cheap enough.  */
!   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
! 			   max_cost - extra_cost))
      {
        /* See whether the specialized multiplication optabs are
  	 cheaper than the shift/add version.  */
        tem = expand_mult_highpart_optab (mode, op0, op1, target,
! 					unsignedp, alg.cost + extra_cost);
        if (tem)
  	return tem;
  
!       tem = convert_to_mode (wider_mode, op0, unsignedp);
!       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
!       tem = extract_high_half (mode, tem);
! 
!       /* Adjust result for signedness. */
!       if (sign_adjust)
! 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
! 
!       return tem;
      }
    return expand_mult_highpart_optab (mode, op0, op1, target,
  				     unsignedp, max_cost);

-- 
  Dr. Ulrich Weigand
  weigand@informatik.uni-erlangen.de


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]