This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Bad choices by expand_mult_highpart
- From: Richard Sandiford <rsandifo at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 18 Mar 2004 12:51:09 +0000
- Subject: Bad choices by expand_mult_highpart
expand_mult_highpart tries several ways of multiplying by a constant.
Unfortunately, the first of these is:
/* expand_mult handles constant multiplication of word_mode
or narrower. It does a poor job for large modes. */
if (size < BITS_PER_WORD
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
{
/* We have to do this, since expand_binop doesn't do conversion for
multiply. Maybe change expand_binop to handle widening multiply? */
op0 = convert_to_mode (wider_mode, op0, unsignedp);
/* We know that this can't have signed overflow, so pretend this is
an unsigned multiply. */
tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
build_int_2 (size, 0), NULL_RTX, 1);
return convert_modes (mode, wider_mode, tem, unsignedp);
}
which means that, on a 64-bit target, most SImode highpart multiplications
are handled by a DImode expand_mult. It in turn uses add/shift sequences
if they are cheap, otherwise it falls back on muldi3. Patterns like mulsidi3
and mulsi_highpart aren't considered.
It's a bit difficult to fix this with the current structure of the code,
so the patch creates the following new functions:
- choose_mult_variant, split out from expand_mult, finds the cheapest
way of multiplying by a constant. It returns true if multiplying
by a constant is cheaper than standard register multiplication
in the same mode.
- expand_mult_const, split out from expand_mult, implements the
choice made by choose_mult_variant.
- expand_mult_highpart_optab, split out from expand_mult_highpart,
tries doing a highpart multiplication using optabs (ignoring
shift/add sequences).
It's then possible for expand_mult_highpart to calculate the cost of
a wider_mode shift/add multiplication without actually expanding it.
There are a couple of other cleanups. The code:
tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
build_int_2 (size, 0), NULL_RTX, 1);
return convert_modes (mode, wider_mode, tem, unsignedp);
appeared several times, so I put it into its own function. This avoids a
gratuitous goto in expand_mult_highpart and means that all callers benefit
from the gen_highpart optimisation that previously only appeared once.
Also, some parts of expand_mult_highpart had code like:
target = expand_binop (.... target ...);
if (target)
return target;
so if the expansion failed, later code wouldn't be able to use
the caller's suggested target. I changed this to:
tem = expand_binop (.... target ...);
if (tem)
return tem;
As an example of the patch in action, mips64el-elf use to compile:
int f (int x) { return x / 10000; }
as:
li $3,1759182848 # 0x68db0000
sll $2,$4,0
ori $3,$3,0x8bad
dmult $2,$3
sra $4,$4,31
mflo $2
dsra $2,$2,32
sra $2,$2,12
j $31
subu $2,$2,$4
but now uses:
li $2,1759182848 # 0x68db0000
ori $2,$2,0x8bad
mult $4,$2
sra $4,$4,31
mfhi $2
sra $2,$2,12
j $31
subu $2,$2,$4
Bootstrapped & regression tested on mips64{,el}-linux-gnu. OK to install?
Richard
* expmed.c (choose_mult_variant, expand_mult_const): New, split from...
(expand_mult): ...here.
(extract_high_half): New, split out from expand_mult_highpart.
(expand_highpart_optab): Likewise. Don't clobber target prematurely.
(expand_highpart): Evaluate the cost of a shift/add sequence,
then see if any of the specialized optabs are cheaper.
Index: expmed.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expmed.c,v
retrieving revision 1.151
diff -c -p -F^\([(a-zA-Z0-9_]\|#define\) -r1.151 expmed.c
*** expmed.c 14 Mar 2004 22:26:05 -0000 1.151
--- expmed.c 18 Mar 2004 12:42:47 -0000
*************** struct algorithm
*** 2149,2159 ****
--- 2149,2172 ----
char log[MAX_BITS_PER_WORD];
};
+ /* Indicates the type of fixup needed after a constant multiplication.
+ BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
+ the result should be negated, and ADD_VARIANT means that the
+ multiplicand should be added to the result. */
+ enum mult_variant {basic_variant, negate_variant, add_variant};
+
static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
+ static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
+ struct algorithm *, enum mult_variant *);
+ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
+ const struct algorithm *, enum mult_variant);
+ static rtx extract_high_half (enum machine_mode, rtx);
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
int, unsigned HOST_WIDE_INT *,
int *, int *);
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
+ static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+ int, int);
/* Compute and return the best algorithm for multiplying by T.
The algorithm must cost less than cost_limit
If retval.cost >= COST_LIMIT, no algorithm was found and all
*************** synth_mult (struct algorithm *alg_out, u
*** 2396,2401 ****
--- 2409,2606 ----
alg_out->ops * sizeof *alg_out->log);
}
+ /* Find the cheapeast way of multiplying a value of mode MODE by VAL.
+ Try three variations:
+
+ - a shift/add sequence based on VAL itself
+ - a shift/add sequence based on -VAL, followed by a negation
+ - a shift/add sequence based on VAL - 1, followed by an addition.
+
+ Return true if the cheapest of these is better than register
+ multiplication, describing the algorithm in *ALG and final
+ fixup in *VARIANT. */
+
+ static bool
+ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
+ struct algorithm *alg, enum mult_variant *variant)
+ {
+ int mult_cost;
+ struct algorithm alg2;
+ rtx reg;
+
+ reg = gen_rtx_REG (mode, FIRST_PSEUDO_REGISTER);
+ mult_cost = rtx_cost (gen_rtx_MULT (mode, reg, GEN_INT (val)), SET);
+ mult_cost = MIN (12 * add_cost, mult_cost);
+
+ *variant = basic_variant;
+ synth_mult (alg, val, mult_cost);
+
+ /* This works only if the inverted value actually fits in an
+ `unsigned int' */
+ if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
+ {
+ synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost);
+ alg2.cost += negate_cost;
+ if (alg2.cost < alg->cost)
+ *alg = alg2, *variant = negate_variant;
+ }
+
+ /* This proves very useful for division-by-constant. */
+ synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost);
+ alg2.cost += add_cost;
+ if (alg2.cost < alg->cost)
+ *alg = alg2, *variant = add_variant;
+
+ return alg->cost < mult_cost;
+ }
+
+ /* A subroutine of expand_mult, used for constant multiplications.
+ Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
+ convenient. Use the shift/add sequence described by ALG and apply
+ the final fixup specified by VARIANT. */
+
+ static rtx
+ expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
+ rtx target, const struct algorithm *alg,
+ enum mult_variant variant)
+ {
+ HOST_WIDE_INT val_so_far;
+ rtx insn, accum, tem;
+ int opno;
+ enum machine_mode nmode;
+
+ /* op0 must be register to make mult_cost match the precomputed
+ shiftadd_cost array. */
+ op0 = protect_from_queue (op0, 0);
+
+ /* Avoid referencing memory over and over.
+ For speed, but also for correctness when mem is volatile. */
+ if (GET_CODE (op0) == MEM)
+ op0 = force_reg (mode, op0);
+
+ /* ACCUM starts out either as OP0 or as a zero, depending on
+ the first operation. */
+
+ if (alg->op[0] == alg_zero)
+ {
+ accum = copy_to_mode_reg (mode, const0_rtx);
+ val_so_far = 0;
+ }
+ else if (alg->op[0] == alg_m)
+ {
+ accum = copy_to_mode_reg (mode, op0);
+ val_so_far = 1;
+ }
+ else
+ abort ();
+
+ for (opno = 1; opno < alg->ops; opno++)
+ {
+ int log = alg->log[opno];
+ int preserve = preserve_subexpressions_p ();
+ rtx shift_subtarget = preserve ? 0 : accum;
+ rtx add_target
+ = (opno == alg->ops - 1 && target != 0 && variant != add_variant
+ && ! preserve)
+ ? target : 0;
+ rtx accum_target = preserve ? 0 : accum;
+
+ switch (alg->op[opno])
+ {
+ case alg_shift:
+ accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ val_so_far <<= log;
+ break;
+
+ case alg_add_t_m2:
+ tem = expand_shift (LSHIFT_EXPR, mode, op0,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+ add_target ? add_target : accum_target);
+ val_so_far += (HOST_WIDE_INT) 1 << log;
+ break;
+
+ case alg_sub_t_m2:
+ tem = expand_shift (LSHIFT_EXPR, mode, op0,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
+ add_target ? add_target : accum_target);
+ val_so_far -= (HOST_WIDE_INT) 1 << log;
+ break;
+
+ case alg_add_t2_m:
+ accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), shift_subtarget,
+ 0);
+ accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
+ add_target ? add_target : accum_target);
+ val_so_far = (val_so_far << log) + 1;
+ break;
+
+ case alg_sub_t2_m:
+ accum = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), shift_subtarget, 0);
+ accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
+ add_target ? add_target : accum_target);
+ val_so_far = (val_so_far << log) - 1;
+ break;
+
+ case alg_add_factor:
+ tem = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+ add_target ? add_target : accum_target);
+ val_so_far += val_so_far << log;
+ break;
+
+ case alg_sub_factor:
+ tem = expand_shift (LSHIFT_EXPR, mode, accum,
+ build_int_2 (log, 0), NULL_RTX, 0);
+ accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
+ (add_target ? add_target
+ : preserve ? 0 : tem));
+ val_so_far = (val_so_far << log) - val_so_far;
+ break;
+
+ default:
+ abort ();
+ }
+
+ /* Write a REG_EQUAL note on the last insn so that we can cse
+ multiplication sequences. Note that if ACCUM is a SUBREG,
+ we've set the inner register and must properly indicate
+ that. */
+
+ tem = op0, nmode = mode;
+ if (GET_CODE (accum) == SUBREG)
+ {
+ nmode = GET_MODE (SUBREG_REG (accum));
+ tem = gen_lowpart (nmode, op0);
+ }
+
+ insn = get_last_insn ();
+ set_unique_reg_note (insn, REG_EQUAL,
+ gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
+ }
+
+ if (variant == negate_variant)
+ {
+ val_so_far = -val_so_far;
+ accum = expand_unop (mode, neg_optab, accum, target, 0);
+ }
+ else if (variant == add_variant)
+ {
+ val_so_far = val_so_far + 1;
+ accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
+ }
+
+ if (val != val_so_far)
+ abort ();
+
+ return accum;
+ }
+
/* Perform a multiplication and return an rtx for the result.
MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
TARGET is a suggestion for where to store the result (an rtx).
*************** expand_mult (enum machine_mode mode, rtx
*** 2409,2414 ****
--- 2614,2621 ----
int unsignedp)
{
rtx const_op1 = op1;
+ enum mult_variant variant;
+ struct algorithm algorithm;
/* synth_mult does an `unsigned int' multiply. As long as the mode is
less than or equal in size to `unsigned int' this doesn't matter.
*************** expand_mult (enum machine_mode mode, rtx
*** 2435,2624 ****
that it seems better to use synth_mult always. */
if (const_op1 && GET_CODE (const_op1) == CONST_INT
! && (unsignedp || ! flag_trapv))
! {
! struct algorithm alg;
! struct algorithm alg2;
! HOST_WIDE_INT val = INTVAL (op1);
! HOST_WIDE_INT val_so_far;
! rtx insn;
! int mult_cost;
! enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
!
! /* op0 must be register to make mult_cost match the precomputed
! shiftadd_cost array. */
! op0 = force_reg (mode, op0);
!
! /* Try to do the computation three ways: multiply by the negative of OP1
! and then negate, do the multiplication directly, or do multiplication
! by OP1 - 1. */
!
! mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
! mult_cost = MIN (12 * add_cost, mult_cost);
!
! synth_mult (&alg, val, mult_cost);
!
! /* This works only if the inverted value actually fits in an
! `unsigned int' */
! if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
! {
! synth_mult (&alg2, - val,
! (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
! if (alg2.cost + negate_cost < alg.cost)
! alg = alg2, variant = negate_variant;
! }
!
! /* This proves very useful for division-by-constant. */
! synth_mult (&alg2, val - 1,
! (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
! if (alg2.cost + add_cost < alg.cost)
! alg = alg2, variant = add_variant;
!
! if (alg.cost < mult_cost)
! {
! /* We found something cheaper than a multiply insn. */
! int opno;
! rtx accum, tem;
! enum machine_mode nmode;
!
! op0 = protect_from_queue (op0, 0);
!
! /* Avoid referencing memory over and over.
! For speed, but also for correctness when mem is volatile. */
! if (GET_CODE (op0) == MEM)
! op0 = force_reg (mode, op0);
!
! /* ACCUM starts out either as OP0 or as a zero, depending on
! the first operation. */
!
! if (alg.op[0] == alg_zero)
! {
! accum = copy_to_mode_reg (mode, const0_rtx);
! val_so_far = 0;
! }
! else if (alg.op[0] == alg_m)
! {
! accum = copy_to_mode_reg (mode, op0);
! val_so_far = 1;
! }
! else
! abort ();
!
! for (opno = 1; opno < alg.ops; opno++)
! {
! int log = alg.log[opno];
! int preserve = preserve_subexpressions_p ();
! rtx shift_subtarget = preserve ? 0 : accum;
! rtx add_target
! = (opno == alg.ops - 1 && target != 0 && variant != add_variant
! && ! preserve)
! ? target : 0;
! rtx accum_target = preserve ? 0 : accum;
!
! switch (alg.op[opno])
! {
! case alg_shift:
! accum = expand_shift (LSHIFT_EXPR, mode, accum,
! build_int_2 (log, 0), NULL_RTX, 0);
! val_so_far <<= log;
! break;
!
! case alg_add_t_m2:
! tem = expand_shift (LSHIFT_EXPR, mode, op0,
! build_int_2 (log, 0), NULL_RTX, 0);
! accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
! add_target
! ? add_target : accum_target);
! val_so_far += (HOST_WIDE_INT) 1 << log;
! break;
!
! case alg_sub_t_m2:
! tem = expand_shift (LSHIFT_EXPR, mode, op0,
! build_int_2 (log, 0), NULL_RTX, 0);
! accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
! add_target
! ? add_target : accum_target);
! val_so_far -= (HOST_WIDE_INT) 1 << log;
! break;
!
! case alg_add_t2_m:
! accum = expand_shift (LSHIFT_EXPR, mode, accum,
! build_int_2 (log, 0), shift_subtarget,
! 0);
! accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
! add_target
! ? add_target : accum_target);
! val_so_far = (val_so_far << log) + 1;
! break;
!
! case alg_sub_t2_m:
! accum = expand_shift (LSHIFT_EXPR, mode, accum,
! build_int_2 (log, 0), shift_subtarget,
! 0);
! accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
! add_target
! ? add_target : accum_target);
! val_so_far = (val_so_far << log) - 1;
! break;
!
! case alg_add_factor:
! tem = expand_shift (LSHIFT_EXPR, mode, accum,
! build_int_2 (log, 0), NULL_RTX, 0);
! accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
! add_target
! ? add_target : accum_target);
! val_so_far += val_so_far << log;
! break;
!
! case alg_sub_factor:
! tem = expand_shift (LSHIFT_EXPR, mode, accum,
! build_int_2 (log, 0), NULL_RTX, 0);
! accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
! (add_target ? add_target
! : preserve ? 0 : tem));
! val_so_far = (val_so_far << log) - val_so_far;
! break;
!
! default:
! abort ();
! }
!
! /* Write a REG_EQUAL note on the last insn so that we can cse
! multiplication sequences. Note that if ACCUM is a SUBREG,
! we've set the inner register and must properly indicate
! that. */
!
! tem = op0, nmode = mode;
! if (GET_CODE (accum) == SUBREG)
! {
! nmode = GET_MODE (SUBREG_REG (accum));
! tem = gen_lowpart (nmode, op0);
! }
!
! insn = get_last_insn ();
! set_unique_reg_note (insn,
! REG_EQUAL,
! gen_rtx_MULT (nmode, tem,
! GEN_INT (val_so_far)));
! }
!
! if (variant == negate_variant)
! {
! val_so_far = - val_so_far;
! accum = expand_unop (mode, neg_optab, accum, target, 0);
! }
! else if (variant == add_variant)
! {
! val_so_far = val_so_far + 1;
! accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
! }
!
! if (val != val_so_far)
! abort ();
!
! return accum;
! }
! }
if (GET_CODE (op0) == CONST_DOUBLE)
{
--- 2642,2651 ----
that it seems better to use synth_mult always. */
if (const_op1 && GET_CODE (const_op1) == CONST_INT
! && (unsignedp || !flag_trapv)
! && choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant))
! return expand_mult_const (mode, op0, INTVAL (const_op1), target,
! &algorithm, variant);
if (GET_CODE (op0) == CONST_DOUBLE)
{
*************** expand_mult_highpart_adjust (enum machin
*** 2832,2901 ****
return target;
}
! /* Emit code to multiply OP0 and CNST1, putting the high half of the result
! in TARGET if that is convenient, and return where the result is. If the
! operation can not be performed, 0 is returned.
! MODE is the mode of operation and result.
! UNSIGNEDP nonzero means unsigned multiply.
! MAX_COST is the total allowed cost for the expanded RTL. */
! rtx
! expand_mult_highpart (enum machine_mode mode, rtx op0,
! unsigned HOST_WIDE_INT cnst1, rtx target,
! int unsignedp, int max_cost)
{
! enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
! optab mul_highpart_optab;
optab moptab;
rtx tem;
! int size = GET_MODE_BITSIZE (mode);
! rtx op1, wide_op1;
!
! /* We can't support modes wider than HOST_BITS_PER_INT. */
! if (size > HOST_BITS_PER_WIDE_INT)
! abort ();
! op1 = gen_int_mode (cnst1, mode);
!
! wide_op1
! = immed_double_const (cnst1,
! (unsignedp
! ? (HOST_WIDE_INT) 0
! : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
! wider_mode);
!
! /* expand_mult handles constant multiplication of word_mode
! or narrower. It does a poor job for large modes. */
! if (size < BITS_PER_WORD
! && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
! {
! /* We have to do this, since expand_binop doesn't do conversion for
! multiply. Maybe change expand_binop to handle widening multiply? */
! op0 = convert_to_mode (wider_mode, op0, unsignedp);
!
! /* We know that this can't have signed overflow, so pretend this is
! an unsigned multiply. */
! tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
! tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
! build_int_2 (size, 0), NULL_RTX, 1);
! return convert_modes (mode, wider_mode, tem, unsignedp);
! }
!
! if (target == 0)
! target = gen_reg_rtx (mode);
/* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */
if (mul_highpart_cost[(int) mode] < max_cost)
{
! mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
! target = expand_binop (mode, mul_highpart_optab,
! op0, op1, target, unsignedp, OPTAB_DIRECT);
! if (target)
! return target;
}
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
--- 2859,2904 ----
return target;
}
! /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
! static rtx
! extract_high_half (enum machine_mode mode, rtx op)
! {
! enum machine_mode wider_mode;
! if (mode == word_mode)
! return gen_highpart (mode, op);
! wider_mode = GET_MODE_WIDER_MODE (mode);
! op = expand_shift (RSHIFT_EXPR, wider_mode, op,
! build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
! return convert_modes (mode, wider_mode, op, 0);
! }
! /* Like expand_mult_highpart, but only consider using a multiplication
! optab. OP1 is an rtx for the constant operand. */
!
! static rtx
! expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
! rtx target, int unsignedp, int max_cost)
{
! enum machine_mode wider_mode;
optab moptab;
rtx tem;
! int size;
! wider_mode = GET_MODE_WIDER_MODE (mode);
! size = GET_MODE_BITSIZE (mode);
/* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */
if (mul_highpart_cost[(int) mode] < max_cost)
{
! moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
! tem = expand_binop (mode, moptab, op0, op1, target,
! unsignedp, OPTAB_DIRECT);
! if (tem)
! return tem;
}
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
*************** expand_mult_highpart (enum machine_mode
*** 2904,2916 ****
&& (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
< max_cost))
{
! mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
! target = expand_binop (mode, mul_highpart_optab,
! op0, op1, target, unsignedp, OPTAB_DIRECT);
! if (target)
/* We used the wrong signedness. Adjust the result. */
! return expand_mult_highpart_adjust (mode, target, op0,
! op1, target, unsignedp);
}
/* Try widening multiplication. */
--- 2907,2919 ----
&& (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
< max_cost))
{
! moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
! tem = expand_binop (mode, moptab, op0, op1, target,
! unsignedp, OPTAB_DIRECT);
! if (tem)
/* We used the wrong signedness. Adjust the result. */
! return expand_mult_highpart_adjust (mode, tem, op0, op1,
! tem, unsignedp);
}
/* Try widening multiplication. */
*************** expand_mult_highpart (enum machine_mode
*** 2918,2925 ****
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
&& mul_widen_cost[(int) wider_mode] < max_cost)
{
! op1 = force_reg (mode, op1);
! goto try;
}
/* Try widening the mode and perform a non-widening multiplication. */
--- 2921,2930 ----
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
&& mul_widen_cost[(int) wider_mode] < max_cost)
{
! tem = expand_binop (wider_mode, moptab, op0, op1, 0,
! unsignedp, OPTAB_WIDEN);
! if (tem == 0)
! return extract_high_half (mode, tem);
}
/* Try widening the mode and perform a non-widening multiplication. */
*************** expand_mult_highpart (enum machine_mode
*** 2928,2935 ****
&& size - 1 < BITS_PER_WORD
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
{
! op1 = wide_op1;
! goto try;
}
/* Try widening multiplication of opposite signedness, and adjust. */
--- 2933,2942 ----
&& size - 1 < BITS_PER_WORD
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
{
! tem = expand_binop (wider_mode, moptab, op0, op1, 0,
! unsignedp, OPTAB_WIDEN);
! if (tem == 0)
! return extract_high_half (mode, tem);
}
/* Try widening multiplication of opposite signedness, and adjust. */
*************** expand_mult_highpart (enum machine_mode
*** 2944,2953 ****
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
if (tem != 0)
{
! /* Extract the high half of the just generated product. */
! tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
! build_int_2 (size, 0), NULL_RTX, 1);
! tem = convert_modes (mode, wider_mode, tem, unsignedp);
/* We used the wrong signedness. Adjust the result. */
return expand_mult_highpart_adjust (mode, tem, op0, op1,
target, unsignedp);
--- 2951,2957 ----
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
if (tem != 0)
{
! tem = extract_high_half (mode, tem);
/* We used the wrong signedness. Adjust the result. */
return expand_mult_highpart_adjust (mode, tem, op0, op1,
target, unsignedp);
*************** expand_mult_highpart (enum machine_mode
*** 2955,2979 ****
}
return 0;
! try:
! /* Pass NULL_RTX as target since TARGET has wrong mode. */
! tem = expand_binop (wider_mode, moptab, op0, op1,
! NULL_RTX, unsignedp, OPTAB_WIDEN);
! if (tem == 0)
! return 0;
! /* Extract the high half of the just generated product. */
! if (mode == word_mode)
! {
! return gen_highpart (mode, tem);
! }
! else
! {
! tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
! build_int_2 (size, 0), NULL_RTX, 1);
! return convert_modes (mode, wider_mode, tem, unsignedp);
}
}
/* Emit the code to divide OP0 by OP1, putting the result in TARGET
--- 2959,3010 ----
}
return 0;
+ }
! /* Emit code to multiply OP0 and CNST1, putting the high half of the result
! in TARGET if that is convenient, and return where the result is. If the
! operation can not be performed, 0 is returned.
! MODE is the mode of operation and result.
!
! UNSIGNEDP nonzero means unsigned multiply.
!
! MAX_COST is the total allowed cost for the expanded RTL. */
!
! rtx
! expand_mult_highpart (enum machine_mode mode, rtx op0,
! unsigned HOST_WIDE_INT cnst1, rtx target,
! int unsignedp, int max_cost)
! {
! enum machine_mode wider_mode;
! enum mult_variant variant;
! struct algorithm alg;
! rtx op1, tem;
!
! /* We can't support modes wider than HOST_BITS_PER_INT. */
! if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
! abort ();
!
! op1 = gen_int_mode (cnst1, mode);
!
! /* See whether shift/add multiplication is cheap enough. */
! if (choose_mult_variant (mode, cnst1, &alg, &variant)
! && (alg.cost += shift_cost[GET_MODE_BITSIZE (mode) - 1]) < max_cost)
! {
! /* See whether the specialized multiplication optabs are
! cheaper than the shift/add version. */
! tem = expand_mult_highpart_optab (mode, op0, op1, target,
! unsignedp, alg.cost);
! if (tem)
! return tem;
!
! wider_mode = GET_MODE_WIDER_MODE (mode);
! op0 = convert_to_mode (wider_mode, op0, unsignedp);
! tem = expand_mult_const (wider_mode, op0, cnst1, 0, &alg, variant);
! return extract_high_half (mode, tem);
}
+ return expand_mult_highpart_optab (mode, op0, op1, target,
+ unsignedp, max_cost);
}
/* Emit the code to divide OP0 by OP1, putting the result in TARGET