This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Patch: Consistently generate widening multiplies


Tree-SSA introduced a regression with widening multiplications;
extension operations are often separated from the MULT_EXPR they feed.
There has been a patch last year to address this in expand_expr_real_2,
but it's still rather more miss than hit as shown by the testcases
included in the patch below.

This patch adds a new mini-pass to tree-ssa-math-opts.c.  It looks for
multiplications, checks that the operands are suitable casts, and
replaces the MULT_EXPR with a WIDEN_MULT_EXPR of the unextended
operands.  The latter tree code has already been added by a previous
patch, but isn't apparently used by anything yet.  I've tweaked the
documentation and implemented the missing RTL expansion code, and
removed support for widening multiplies from MULT_EXPR expansion.
There's now some code that tries to compare the cost of a widening
multiply by constant and a synth_mult sequence and choose the cheaper
version.

Bootstrapped & tested on i686-linux.  Ok (and for which gcc version)?


Bernd
-- 
This footer brought to you by insane German lawmakers.
Analog Devices GmbH      Wilhelm-Wagenfeld-Str. 6      80807 Muenchen
Sitz der Gesellschaft Muenchen, Registergericht Muenchen HRB 40368
Geschaeftsfuehrer Thomas Wessel, William A. Martin, Margaret Seif
	* optabs.h (expand_widening_mult): Declare.
	* tree-pass.h (pass_optimize_widening_mul): Declare.
	* tree-ssa-math-opts.c (execute_optimize_widening_mul,
	gate_optimize_widening_mul): New static functions.
	(pass_optimize_widening_mul): New.
	* expr.c (expand_expr_real_2) <case WIDEN_MULT_EXPR>: New
	case.
	<case MULT_EXPR>: Remove support for widening multiplies.
	* tree.def (WIDEN_MULT_EXPR): Tweak comment.
	* expmed.c (expand_widening_mult): New function.
	* passes.c (init_optimization_passes): Add pass_optimize_widening_mul.

testsuite/
	* gcc.target/i386/wmul-1.c: New test.
	* gcc.target/i386/wmul-2.c: New test.
	* gcc.target/bfin/wmul-1.c: New test.
	* gcc.target/bfin/wmul-2.c: New test.

Index: optabs.h
===================================================================
--- optabs.h	(revision 156770)
+++ optabs.h	(working copy)
@@ -771,6 +771,9 @@ extern void expand_fix (rtx, rtx, int);
 /* Generate code for float to integral conversion.  */
 extern bool expand_sfix_optab (rtx, rtx, convert_optab);
 
+/* Generate code for a widening multiply.  */
+extern rtx expand_widening_mult (enum machine_mode, rtx, rtx, rtx, int, optab);
+
 /* Return tree if target supports vector operations for COND_EXPR.  */
 bool expand_vec_cond_expr_p (tree, enum machine_mode);
 
Index: tree-pass.h
===================================================================
--- tree-pass.h	(revision 156770)
+++ tree-pass.h	(working copy)
@@ -407,6 +407,7 @@ extern struct gimple_opt_pass pass_late_
 extern struct gimple_opt_pass pass_cse_reciprocals;
 extern struct gimple_opt_pass pass_cse_sincos;
 extern struct gimple_opt_pass pass_optimize_bswap;
+extern struct gimple_opt_pass pass_optimize_widening_mul;
 extern struct gimple_opt_pass pass_warn_function_return;
 extern struct gimple_opt_pass pass_warn_function_noreturn;
 extern struct gimple_opt_pass pass_cselim;
Index: testsuite/gcc.target/i386/wmul-2.c
===================================================================
--- testsuite/gcc.target/i386/wmul-2.c	(revision 0)
+++ testsuite/gcc.target/i386/wmul-2.c	(revision 0)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void vec_mpy(int y[], const int x[], int scaler)
+{
+ int i;
+
+ for (i = 0; i < 150; i++)
+   y[i] += (((long long)scaler * x[i]) >> 31);
+}
+
+/* { dg-final { scan-assembler-times "imull" 1 } } */
Index: testsuite/gcc.target/i386/wmul-1.c
===================================================================
--- testsuite/gcc.target/i386/wmul-1.c	(revision 0)
+++ testsuite/gcc.target/i386/wmul-1.c	(revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long mac(const int *a, const int *b, long long sqr, long long *sum)
+{
+  int i;
+  long long dotp = *sum;
+
+  for (i = 0; i < 150; i++) {
+    dotp += (long long)b[i] * (long long)a[i];
+    sqr += (long long)b[i] * (long long)b[i];
+  }
+
+  *sum = dotp;
+  return sqr;
+}
+
+/* { dg-final { scan-assembler-times "imull" 2 } } */
Index: testsuite/gcc.target/bfin/wmul-1.c
===================================================================
--- testsuite/gcc.target/bfin/wmul-1.c	(revision 0)
+++ testsuite/gcc.target/bfin/wmul-1.c	(revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int mac(const short *a, const short *b, int sqr, int *sum)
+{
+  int i;
+  int dotp = *sum;
+
+  for (i = 0; i < 150; i++) {
+    dotp += b[i] * a[i];
+    sqr += b[i] * b[i];
+  }
+
+  *sum = dotp;
+  return sqr;
+}
+
+/* { dg-final { scan-assembler-times "\\(IS\\)" 2 } } */
Index: tree-ssa-math-opts.c
===================================================================
--- tree-ssa-math-opts.c	(revision 156770)
+++ tree-ssa-math-opts.c	(working copy)
@@ -1256,3 +1256,144 @@ struct gimple_opt_pass pass_optimize_bsw
   0                                     /* todo_flags_finish */
  }
 };
+
+/* Find integer multiplications where the operands are extended from
+   smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
+   where appropriate.  */
+
+static unsigned int
+execute_optimize_widening_mul (void)
+{
+  bool changed = false;
+  basic_block bb;
+
+  FOR_EACH_BB (bb)
+    {
+      gimple_stmt_iterator gsi;
+
+      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+        {
+	  gimple stmt = gsi_stmt (gsi);
+	  gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
+	  tree type, type1, type2;
+	  tree rhs1, rhs2, rhs1_convop, rhs2_convop;
+	  enum tree_code rhs1_code, rhs2_code;
+
+	  if (!is_gimple_assign (stmt)
+	      || gimple_assign_rhs_code (stmt) != MULT_EXPR)
+	    continue;
+
+	  type = TREE_TYPE (gimple_assign_lhs (stmt));
+
+	  if (TREE_CODE (type) != INTEGER_TYPE)
+	    continue;
+
+	  rhs1 = gimple_assign_rhs1 (stmt);
+	  rhs2 = gimple_assign_rhs2 (stmt);
+
+	  if (TREE_CODE (rhs1) == SSA_NAME)
+	    {
+	      rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
+	      if (!is_gimple_assign (rhs1_stmt))
+		continue;
+	      rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
+	      if (rhs1_code != CONVERT_EXPR && rhs1_code != NOP_EXPR)
+		continue;
+	      rhs1_convop = gimple_assign_rhs1 (rhs1_stmt);
+	      type1 = TREE_TYPE (rhs1_convop);
+	      if (TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
+		continue;
+	    }
+	  else if (TREE_CODE (rhs1) != INTEGER_CST)
+	    continue;
+
+	  if (TREE_CODE (rhs2) == SSA_NAME)
+	    {
+	      rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
+	      if (!is_gimple_assign (rhs2_stmt))
+		continue;
+	      rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
+	      if (rhs2_code != CONVERT_EXPR && rhs2_code != NOP_EXPR)
+		continue;
+	      rhs2_convop = gimple_assign_rhs1 (rhs2_stmt);
+	      type2 = TREE_TYPE (rhs2_convop);
+	      if (TYPE_PRECISION (type2) * 2 != TYPE_PRECISION (type))
+		continue;
+	    }
+	  else if (TREE_CODE (rhs2) != INTEGER_CST)
+	    continue;
+
+	  if (rhs1_stmt == NULL && rhs2_stmt == NULL)
+	    continue;
+
+	  if ((rhs1_stmt == NULL && !int_fits_type_p (rhs1, type2))
+	      || (rhs2_stmt == NULL && !int_fits_type_p (rhs2, type1)))
+	    continue;
+
+	  if (rhs1_stmt != NULL && gimple_bb (rhs1_stmt) != gimple_bb (stmt))
+	    {
+	      gimple_stmt_iterator other_gsi = gsi_for_stmt (rhs1_stmt);
+	      gimple tmp_stmt;
+	      tree tmpvar1;
+
+	      tmpvar1 = create_tmp_var (TREE_TYPE (rhs1_convop), "wmulsrc");
+	      add_referenced_var (tmpvar1);
+	      tmpvar1 = make_ssa_name (tmpvar1, NULL);
+
+	      tmp_stmt = gimple_build_assign (tmpvar1, rhs1_convop);
+	      gsi_insert_before (&other_gsi, tmp_stmt, GSI_SAME_STMT);
+	      rhs1_convop = tmpvar1;
+	    }
+
+	  if (rhs2_stmt != NULL && gimple_bb (rhs2_stmt) != gimple_bb (stmt))
+	    {
+	      gimple_stmt_iterator other_gsi = gsi_for_stmt (rhs2_stmt);
+	      gimple tmp_stmt;
+	      tree tmpvar1;
+
+	      tmpvar1 = create_tmp_var (TREE_TYPE (rhs2_convop), "wmulsrc");
+	      add_referenced_var (tmpvar1);
+	      tmpvar1 = make_ssa_name (tmpvar1, NULL);
+
+	      tmp_stmt = gimple_build_assign (tmpvar1, rhs2_convop);
+	      gsi_insert_before (&other_gsi, tmp_stmt, GSI_SAME_STMT);
+	      rhs2_convop = tmpvar1;
+	    }
+
+	  if (rhs1_stmt != NULL)
+	    gimple_assign_set_rhs1 (stmt, rhs1_convop);
+	  if (rhs2_stmt != NULL)
+	    gimple_assign_set_rhs2 (stmt, rhs2_convop);
+	  gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
+	  update_stmt (stmt);
+	  changed = true;
+	}
+    }
+  return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
+	  | TODO_verify_stmts : 0);
+}
+
+static bool
+gate_optimize_widening_mul (void)
+{
+  return flag_expensive_optimizations && optimize;
+}
+
+struct gimple_opt_pass pass_optimize_widening_mul =
+{
+ {
+  GIMPLE_PASS,
+  "widening_mul",			/* name */
+  gate_optimize_widening_mul,		/* gate */
+  execute_optimize_widening_mul,	/* execute */
+  NULL,					/* sub */
+  NULL,					/* next */
+  0,					/* static_pass_number */
+  TV_NONE,				/* tv_id */
+  PROP_ssa,				/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  0                                     /* todo_flags_finish */
+ }
+};
Index: expr.c
===================================================================
--- expr.c	(revision 156770)
+++ expr.c	(working copy)
@@ -7204,7 +7204,6 @@ expand_expr_real_2 (sepops ops, rtx targ
   optab this_optab;
   rtx subtarget, original_target;
   int ignore;
-  tree subexp0, subexp1;
   bool reduce_bit_field;
   gimple subexp0_def, subexp1_def;
   tree top0, top1;
@@ -7659,13 +7658,7 @@ expand_expr_real_2 (sepops ops, rtx targ
 
       goto binop2;
 
-    case MULT_EXPR:
-      /* If this is a fixed-point operation, then we cannot use the code
-	 below because "expand_mult" doesn't support sat/no-sat fixed-point
-         multiplications.   */
-      if (ALL_FIXED_POINT_MODE_P (mode))
-	goto binop;
-
+    case WIDEN_MULT_EXPR:
       /* If first operand is constant, swap them.
 	 Thus the following special case checks need only
 	 check the second operand.  */
@@ -7676,96 +7669,43 @@ expand_expr_real_2 (sepops ops, rtx targ
 	  treeop1 = t1;
 	}
 
-      /* Attempt to return something suitable for generating an
-	 indexed address, for machines that support that.  */
-
-      if (modifier == EXPAND_SUM && mode == ptr_mode
-	  && host_integerp (treeop1, 0))
-	{
-	  tree exp1 = treeop1;
-
-	  op0 = expand_expr (treeop0, subtarget, VOIDmode,
-			     EXPAND_SUM);
-
-	  if (!REG_P (op0))
-	    op0 = force_operand (op0, NULL_RTX);
-	  if (!REG_P (op0))
-	    op0 = copy_to_mode_reg (mode, op0);
-
-	  return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0,
-			       gen_int_mode (tree_low_cst (exp1, 0),
-					     TYPE_MODE (TREE_TYPE (exp1)))));
-	}
-
-      if (modifier == EXPAND_STACK_PARM)
-	target = 0;
-
-      /* Check for multiplying things that have been extended
-	 from a narrower type.  If this machine supports multiplying
-	 in that narrower type with a result in the desired type,
-	 do it that way, and avoid the explicit type-conversion.  */
-
-      subexp0 = treeop0;
-      subexp1 = treeop1;
-      subexp0_def = get_def_for_expr (subexp0, NOP_EXPR);
-      subexp1_def = get_def_for_expr (subexp1, NOP_EXPR);
-      top0 = top1 = NULL_TREE;
+      gcc_assert (TREE_CODE (type) == INTEGER_TYPE);
+      gcc_assert ((TREE_CODE (treeop1) == INTEGER_CST
+		   && int_fits_type_p (treeop1, TREE_TYPE (treeop0)))
+		  || ((2 * TYPE_PRECISION (TREE_TYPE (treeop0))
+		       == TYPE_PRECISION (type))
+		      && (TYPE_PRECISION (TREE_TYPE (treeop0))
+			  == TYPE_PRECISION (TREE_TYPE (treeop1)))));
 
       /* First, check if we have a multiplication of one signed and one
 	 unsigned operand.  */
-      if (subexp0_def
-	  && (top0 = gimple_assign_rhs1 (subexp0_def))
-	  && subexp1_def
-	  && (top1 = gimple_assign_rhs1 (subexp1_def))
-	  && TREE_CODE (type) == INTEGER_TYPE
-	  && (TYPE_PRECISION (TREE_TYPE (top0))
-	      < TYPE_PRECISION (TREE_TYPE (subexp0)))
-	  && (TYPE_PRECISION (TREE_TYPE (top0))
-	      == TYPE_PRECISION (TREE_TYPE (top1)))
-	  && (TYPE_UNSIGNED (TREE_TYPE (top0))
-	      != TYPE_UNSIGNED (TREE_TYPE (top1))))
+      if (TREE_CODE (treeop1) != INTEGER_CST
+	  && (TYPE_UNSIGNED (TREE_TYPE (treeop0))
+	      != TYPE_UNSIGNED (TREE_TYPE (treeop1))))
 	{
-	  enum machine_mode innermode
-	    = TYPE_MODE (TREE_TYPE (top0));
+	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0));
 	  this_optab = usmul_widen_optab;
-	  if (mode == GET_MODE_WIDER_MODE (innermode))
+	  if (mode == GET_MODE_2XWIDER_MODE (innermode))
 	    {
 	      if (optab_handler (this_optab, mode)->insn_code != CODE_FOR_nothing)
 		{
-		  if (TYPE_UNSIGNED (TREE_TYPE (top0)))
-		    expand_operands (top0, top1, NULL_RTX, &op0, &op1,
+		  if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
+		    expand_operands (treeop0, treeop1, subtarget, &op0, &op1,
 				     EXPAND_NORMAL);
 		  else
-		    expand_operands (top0, top1, NULL_RTX, &op1, &op0,
+		    expand_operands (treeop0, treeop1, subtarget, &op1, &op0,
 				     EXPAND_NORMAL);
-
 		  goto binop3;
 		}
 	    }
 	}
-      /* Check for a multiplication with matching signedness.  If
-	 valid, TOP0 and TOP1 were set in the previous if
-	 condition.  */
-      else if (top0
-	  && TREE_CODE (type) == INTEGER_TYPE
-	  && (TYPE_PRECISION (TREE_TYPE (top0))
-	      < TYPE_PRECISION (TREE_TYPE (subexp0)))
-	  && ((TREE_CODE (subexp1) == INTEGER_CST
-	       && int_fits_type_p (subexp1, TREE_TYPE (top0))
-	       /* Don't use a widening multiply if a shift will do.  */
-	       && ((GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (subexp1)))
-		    > HOST_BITS_PER_WIDE_INT)
-		   || exact_log2 (TREE_INT_CST_LOW (subexp1)) < 0))
-	      ||
-	      (top1
-	       && (TYPE_PRECISION (TREE_TYPE (top1))
-		   == TYPE_PRECISION (TREE_TYPE (top0))
-	       /* If both operands are extended, they must either both
-		  be zero-extended or both be sign-extended.  */
-	       && (TYPE_UNSIGNED (TREE_TYPE (top1))
-		   == TYPE_UNSIGNED (TREE_TYPE (top0)))))))
+      /* Check for a multiplication with matching signedness.  */
+      else if ((TREE_CODE (treeop1) == INTEGER_CST
+		&& int_fits_type_p (treeop1, TREE_TYPE (treeop0)))
+	       || (TYPE_UNSIGNED (TREE_TYPE (treeop1))
+		   == TYPE_UNSIGNED (TREE_TYPE (treeop0))))
 	{
-	  tree op0type = TREE_TYPE (top0);
+	  tree op0type = TREE_TYPE (treeop0);
 	  enum machine_mode innermode = TYPE_MODE (op0type);
 	  bool zextend_p = TYPE_UNSIGNED (op0type);
 	  optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab;
@@ -7775,24 +7715,22 @@ expand_expr_real_2 (sepops ops, rtx targ
 	    {
 	      if (optab_handler (this_optab, mode)->insn_code != CODE_FOR_nothing)
 		{
-		  if (TREE_CODE (subexp1) == INTEGER_CST)
-		    expand_operands (top0, subexp1, NULL_RTX, &op0, &op1,
-				     EXPAND_NORMAL);
-		  else
-		    expand_operands (top0, top1, NULL_RTX, &op0, &op1,
-				     EXPAND_NORMAL);
-		  goto binop3;
+		  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
+				   EXPAND_NORMAL);
+		  temp = expand_widening_mult (mode, op0, op1, target,
+					       unsignedp, this_optab);
+		  return REDUCE_BIT_FIELD (temp);
 		}
-	      else if (optab_handler (other_optab, mode)->insn_code != CODE_FOR_nothing
-		       && innermode == word_mode)
+	      if (optab_handler (other_optab, mode)->insn_code != CODE_FOR_nothing
+		  && innermode == word_mode)
 		{
 		  rtx htem, hipart;
-		  op0 = expand_normal (top0);
-		  if (TREE_CODE (subexp1) == INTEGER_CST)
+		  op0 = expand_normal (treeop0);
+		  if (TREE_CODE (treeop1) == INTEGER_CST)
 		    op1 = convert_modes (innermode, mode,
-					 expand_normal (subexp1), unsignedp);
+					 expand_normal (treeop1), unsignedp);
 		  else
-		    op1 = expand_normal (top1);
+		    op1 = expand_normal (treeop1);
 		  temp = expand_binop (mode, other_optab, op0, op1, target,
 				       unsignedp, OPTAB_LIB_WIDEN);
 		  hipart = gen_highpart (innermode, temp);
@@ -7805,7 +7743,53 @@ expand_expr_real_2 (sepops ops, rtx targ
 		}
 	    }
 	}
-      expand_operands (subexp0, subexp1, subtarget, &op0, &op1, EXPAND_NORMAL);
+      treeop0 = fold_build1 (CONVERT_EXPR, type, treeop0);
+      treeop1 = fold_build1 (CONVERT_EXPR, type, treeop1);
+      expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
+      return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
+
+    case MULT_EXPR:
+      /* If this is a fixed-point operation, then we cannot use the code
+	 below because "expand_mult" doesn't support sat/no-sat fixed-point
+         multiplications.   */
+      if (ALL_FIXED_POINT_MODE_P (mode))
+	goto binop;
+
+      /* If first operand is constant, swap them.
+	 Thus the following special case checks need only
+	 check the second operand.  */
+      if (TREE_CODE (treeop0) == INTEGER_CST)
+	{
+	  tree t1 = treeop0;
+	  treeop0 = treeop1;
+	  treeop1 = t1;
+	}
+
+      /* Attempt to return something suitable for generating an
+	 indexed address, for machines that support that.  */
+
+      if (modifier == EXPAND_SUM && mode == ptr_mode
+	  && host_integerp (treeop1, 0))
+	{
+	  tree exp1 = treeop1;
+
+	  op0 = expand_expr (treeop0, subtarget, VOIDmode,
+			     EXPAND_SUM);
+
+	  if (!REG_P (op0))
+	    op0 = force_operand (op0, NULL_RTX);
+	  if (!REG_P (op0))
+	    op0 = copy_to_mode_reg (mode, op0);
+
+	  return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0,
+			       gen_int_mode (tree_low_cst (exp1, 0),
+					     TYPE_MODE (TREE_TYPE (exp1)))));
+	}
+
+      if (modifier == EXPAND_STACK_PARM)
+	target = 0;
+
+      expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
       return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
 
     case TRUNC_DIV_EXPR:
Index: tree.def
===================================================================
--- tree.def	(revision 156770)
+++ tree.def	(working copy)
@@ -1077,10 +1077,10 @@ DEFTREECODE (DOT_PROD_EXPR, "dot_prod_ex
 DEFTREECODE (WIDEN_SUM_EXPR, "widen_sum_expr", tcc_binary, 2)
 
 /* Widening multiplication.
-   The two arguments are of type t1.
    The result is of type t2, such that t2 is at least twice
-   the size of t1. WIDEN_MULT_EXPR is equivalent to first widening (promoting)
-   the arguments from type t1 to type t2, and then multiplying them.  */
+   the size of the type of each argument. WIDEN_MULT_EXPR is
+   equivalent to first widening (promoting) the arguments from
+   their respective types to type t2, and then multiplying them.  */
 DEFTREECODE (WIDEN_MULT_EXPR, "widen_mult_expr", tcc_binary, 2)
 
 /* Whole vector left/right shift in bits.
Index: expmed.c
===================================================================
--- expmed.c	(revision 156770)
+++ expmed.c	(working copy)
@@ -3253,6 +3253,55 @@ expand_mult (enum machine_mode mode, rtx
   gcc_assert (op0);
   return op0;
 }
+
+/* Perform a widening multiplication and return an rtx for the result.
+   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
+   TARGET is a suggestion for where to store the result (an rtx).
+   THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
+   or smul_widen_optab.
+
+   We check specially for a constant integer as OP1, comparing the
+   cost of a widening multiply against the cost of a sequence of shifts
+   and adds.  */
+
+rtx
+expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
+		      int unsignedp, optab this_optab)
+{
+  bool speed = optimize_insn_for_speed_p ();
+
+  if (CONST_INT_P (op1)
+      && (INTVAL (op1) >= 0
+	  || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
+    {
+      HOST_WIDE_INT coeff = INTVAL (op1);
+      int max_cost;
+      enum mult_variant variant;
+      struct algorithm algorithm;
+
+      /* Special case powers of two.  */
+      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
+	{
+	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
+	  return expand_shift (LSHIFT_EXPR, mode, op0,
+			       build_int_cst (NULL_TREE, floor_log2 (coeff)),
+			       target, unsignedp);
+	}
+
+      /* Exclude cost of op0 from max_cost to match the cost
+	 calculation of the synth_mult.  */
+      max_cost = mul_widen_cost[speed][mode];
+      if (choose_mult_variant (mode, coeff, &algorithm, &variant,
+			       max_cost))
+	{
+	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
+	  return expand_mult_const (mode, op0, coeff, target,
+				    &algorithm, variant);
+	}
+    }
+  return expand_binop (mode, this_optab, op0, op1, target,
+		       unsignedp, OPTAB_LIB_WIDEN);
+}
 
 /* Return the smallest n such that 2**n >= X.  */
 
Index: passes.c
===================================================================
--- passes.c	(revision 156770)
+++ passes.c	(working copy)
@@ -937,6 +937,7 @@ init_optimization_passes (void)
       NEXT_PASS (pass_forwprop);
       NEXT_PASS (pass_phiopt);
       NEXT_PASS (pass_fold_builtins);
+      NEXT_PASS (pass_optimize_widening_mul);
       NEXT_PASS (pass_tail_calls);
       NEXT_PASS (pass_rename_ssa_copies);
       NEXT_PASS (pass_uncprop);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]