* optabs.h (expand_widening_mult): Declare. * tree-pass.h (pass_optimize_widening_mul): Declare. * tree-ssa-math-opts.c (execute_optimize_widening_mul, gate_optimize_widening_mul): New static functions. (pass_optimize_widening_mul): New. * expr.c (expand_expr_real_2) : New case. : Remove support for widening multiplies. * tree.def (WIDEN_MULT_EXPR): Tweak comment. * cfgexpand.c (expand_debug_expr) : Use simplify_gen_unary rather than directly building extensions. * tree-cfg.c (verify_gimple_assign_binary): Add tests for WIDEN_MULT_EXPR. * expmed.c (expand_widening_mult): New function. * passes.c (init_optimization_passes): Add pass_optimize_widening_mul. testsuite/ * gcc.target/i386/wmul-1.c: New test. * gcc.target/i386/wmul-2.c: New test. * gcc.target/bfin/wmul-1.c: New test. * gcc.target/bfin/wmul-2.c: New test. Index: tree-pass.h =================================================================== --- tree-pass.h (revision 158513) +++ tree-pass.h (working copy) @@ -407,6 +407,7 @@ extern struct gimple_opt_pass pass_late_ extern struct gimple_opt_pass pass_cse_reciprocals; extern struct gimple_opt_pass pass_cse_sincos; extern struct gimple_opt_pass pass_optimize_bswap; +extern struct gimple_opt_pass pass_optimize_widening_mul; extern struct gimple_opt_pass pass_warn_function_return; extern struct gimple_opt_pass pass_warn_function_noreturn; extern struct gimple_opt_pass pass_cselim; Index: tree-ssa-math-opts.c =================================================================== --- tree-ssa-math-opts.c (revision 158513) +++ tree-ssa-math-opts.c (working copy) @@ -1260,3 +1260,137 @@ struct gimple_opt_pass pass_optimize_bsw 0 /* todo_flags_finish */ } }; + +/* Find integer multiplications where the operands are extended from + smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR + where appropriate. */ + +static unsigned int +execute_optimize_widening_mul (void) +{ + bool changed = false; + basic_block bb; + + FOR_EACH_BB (bb) + { + gimple_stmt_iterator gsi; + + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + gimple rhs1_stmt = NULL, rhs2_stmt = NULL; + tree type, type1 = NULL, type2 = NULL; + tree rhs1, rhs2, rhs1_convop = NULL, rhs2_convop = NULL; + enum tree_code rhs1_code, rhs2_code; + + if (!is_gimple_assign (stmt) + || gimple_assign_rhs_code (stmt) != MULT_EXPR) + continue; + + type = TREE_TYPE (gimple_assign_lhs (stmt)); + + if (TREE_CODE (type) != INTEGER_TYPE) + continue; + + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + + if (TREE_CODE (rhs1) == SSA_NAME) + { + rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); + if (!is_gimple_assign (rhs1_stmt)) + continue; + rhs1_code = gimple_assign_rhs_code (rhs1_stmt); + if (rhs1_code != CONVERT_EXPR && rhs1_code != NOP_EXPR) + continue; + rhs1_convop = gimple_assign_rhs1 (rhs1_stmt); + type1 = TREE_TYPE (rhs1_convop); + if (TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type)) + continue; + } + else if (TREE_CODE (rhs1) != INTEGER_CST) + continue; + + if (TREE_CODE (rhs2) == SSA_NAME) + { + rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); + if (!is_gimple_assign (rhs2_stmt)) + continue; + rhs2_code = gimple_assign_rhs_code (rhs2_stmt); + if (rhs2_code != CONVERT_EXPR && rhs2_code != NOP_EXPR) + continue; + rhs2_convop = gimple_assign_rhs1 (rhs2_stmt); + type2 = TREE_TYPE (rhs2_convop); + if (TYPE_PRECISION (type2) * 2 != TYPE_PRECISION (type)) + continue; + } + else if (TREE_CODE (rhs2) != INTEGER_CST) + continue; + + /* Verify that the machine can perform a widening multiply in this + mode/signedness combination, otherwise this transformation is + likely to pessimize code. */ + if ((rhs1_stmt == NULL || TYPE_UNSIGNED (type1)) + && (rhs2_stmt == NULL || TYPE_UNSIGNED (type2)) + && (optab_handler (umul_widen_optab, TYPE_MODE (type)) + ->insn_code == CODE_FOR_nothing)) + continue; + else if ((rhs1_stmt == NULL || !TYPE_UNSIGNED (type1)) + && (rhs2_stmt == NULL || !TYPE_UNSIGNED (type2)) + && (optab_handler (smul_widen_optab, TYPE_MODE (type)) + ->insn_code == CODE_FOR_nothing)) + continue; + else if (rhs1_stmt != NULL && rhs2_stmt != 0 + && (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) + && (optab_handler (usmul_widen_optab, TYPE_MODE (type)) + ->insn_code == CODE_FOR_nothing)) + continue; + + if (rhs1_stmt == NULL && rhs2_stmt == NULL) + continue; + + if ((rhs1_stmt == NULL && !int_fits_type_p (rhs1, type2)) + || (rhs2_stmt == NULL && !int_fits_type_p (rhs2, type1))) + continue; + + if (rhs1_stmt == NULL) + gimple_assign_set_rhs1 (stmt, fold_convert (type2, rhs1)); + else + gimple_assign_set_rhs1 (stmt, rhs1_convop); + if (rhs2_stmt == NULL) + gimple_assign_set_rhs2 (stmt, fold_convert (type1, rhs2)); + else + gimple_assign_set_rhs2 (stmt, rhs2_convop); + gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); + update_stmt (stmt); + changed = true; + } + } + return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa + | TODO_verify_stmts : 0); +} + +static bool +gate_optimize_widening_mul (void) +{ + return flag_expensive_optimizations && optimize; +} + +struct gimple_opt_pass pass_optimize_widening_mul = +{ + { + GIMPLE_PASS, + "widening_mul", /* name */ + gate_optimize_widening_mul, /* gate */ + execute_optimize_widening_mul, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_NONE, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0 /* todo_flags_finish */ + } +}; Index: expr.c =================================================================== --- expr.c (revision 158513) +++ expr.c (working copy) @@ -7217,7 +7217,6 @@ expand_expr_real_2 (sepops ops, rtx targ optab this_optab; rtx subtarget, original_target; int ignore; - tree subexp0, subexp1; bool reduce_bit_field; gimple subexp0_def, subexp1_def; tree top0, top1; @@ -7672,13 +7671,7 @@ expand_expr_real_2 (sepops ops, rtx targ goto binop2; - case MULT_EXPR: - /* If this is a fixed-point operation, then we cannot use the code - below because "expand_mult" doesn't support sat/no-sat fixed-point - multiplications. */ - if (ALL_FIXED_POINT_MODE_P (mode)) - goto binop; - + case WIDEN_MULT_EXPR: /* If first operand is constant, swap them. Thus the following special case checks need only check the second operand. */ @@ -7689,96 +7682,35 @@ expand_expr_real_2 (sepops ops, rtx targ treeop1 = t1; } - /* Attempt to return something suitable for generating an - indexed address, for machines that support that. */ - - if (modifier == EXPAND_SUM && mode == ptr_mode - && host_integerp (treeop1, 0)) - { - tree exp1 = treeop1; - - op0 = expand_expr (treeop0, subtarget, VOIDmode, - EXPAND_SUM); - - if (!REG_P (op0)) - op0 = force_operand (op0, NULL_RTX); - if (!REG_P (op0)) - op0 = copy_to_mode_reg (mode, op0); - - return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0, - gen_int_mode (tree_low_cst (exp1, 0), - TYPE_MODE (TREE_TYPE (exp1))))); - } - - if (modifier == EXPAND_STACK_PARM) - target = 0; - - /* Check for multiplying things that have been extended - from a narrower type. If this machine supports multiplying - in that narrower type with a result in the desired type, - do it that way, and avoid the explicit type-conversion. */ - - subexp0 = treeop0; - subexp1 = treeop1; - subexp0_def = get_def_for_expr (subexp0, NOP_EXPR); - subexp1_def = get_def_for_expr (subexp1, NOP_EXPR); - top0 = top1 = NULL_TREE; - /* First, check if we have a multiplication of one signed and one unsigned operand. */ - if (subexp0_def - && (top0 = gimple_assign_rhs1 (subexp0_def)) - && subexp1_def - && (top1 = gimple_assign_rhs1 (subexp1_def)) - && TREE_CODE (type) == INTEGER_TYPE - && (TYPE_PRECISION (TREE_TYPE (top0)) - < TYPE_PRECISION (TREE_TYPE (subexp0))) - && (TYPE_PRECISION (TREE_TYPE (top0)) - == TYPE_PRECISION (TREE_TYPE (top1))) - && (TYPE_UNSIGNED (TREE_TYPE (top0)) - != TYPE_UNSIGNED (TREE_TYPE (top1)))) + if (TREE_CODE (treeop1) != INTEGER_CST + && (TYPE_UNSIGNED (TREE_TYPE (treeop0)) + != TYPE_UNSIGNED (TREE_TYPE (treeop1)))) { - enum machine_mode innermode - = TYPE_MODE (TREE_TYPE (top0)); + enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0)); this_optab = usmul_widen_optab; - if (mode == GET_MODE_WIDER_MODE (innermode)) + if (mode == GET_MODE_2XWIDER_MODE (innermode)) { if (optab_handler (this_optab, mode)->insn_code != CODE_FOR_nothing) { - if (TYPE_UNSIGNED (TREE_TYPE (top0))) - expand_operands (top0, top1, NULL_RTX, &op0, &op1, + if (TYPE_UNSIGNED (TREE_TYPE (treeop0))) + expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); else - expand_operands (top0, top1, NULL_RTX, &op1, &op0, + expand_operands (treeop0, treeop1, subtarget, &op1, &op0, EXPAND_NORMAL); - goto binop3; } } } - /* Check for a multiplication with matching signedness. If - valid, TOP0 and TOP1 were set in the previous if - condition. */ - else if (top0 - && TREE_CODE (type) == INTEGER_TYPE - && (TYPE_PRECISION (TREE_TYPE (top0)) - < TYPE_PRECISION (TREE_TYPE (subexp0))) - && ((TREE_CODE (subexp1) == INTEGER_CST - && int_fits_type_p (subexp1, TREE_TYPE (top0)) - /* Don't use a widening multiply if a shift will do. */ - && ((GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (subexp1))) - > HOST_BITS_PER_WIDE_INT) - || exact_log2 (TREE_INT_CST_LOW (subexp1)) < 0)) - || - (top1 - && (TYPE_PRECISION (TREE_TYPE (top1)) - == TYPE_PRECISION (TREE_TYPE (top0)) - /* If both operands are extended, they must either both - be zero-extended or both be sign-extended. */ - && (TYPE_UNSIGNED (TREE_TYPE (top1)) - == TYPE_UNSIGNED (TREE_TYPE (top0))))))) + /* Check for a multiplication with matching signedness. */ + else if ((TREE_CODE (treeop1) == INTEGER_CST + && int_fits_type_p (treeop1, TREE_TYPE (treeop0))) + || (TYPE_UNSIGNED (TREE_TYPE (treeop1)) + == TYPE_UNSIGNED (TREE_TYPE (treeop0)))) { - tree op0type = TREE_TYPE (top0); + tree op0type = TREE_TYPE (treeop0); enum machine_mode innermode = TYPE_MODE (op0type); bool zextend_p = TYPE_UNSIGNED (op0type); optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab; @@ -7788,24 +7720,22 @@ expand_expr_real_2 (sepops ops, rtx targ { if (optab_handler (this_optab, mode)->insn_code != CODE_FOR_nothing) { - if (TREE_CODE (subexp1) == INTEGER_CST) - expand_operands (top0, subexp1, NULL_RTX, &op0, &op1, - EXPAND_NORMAL); - else - expand_operands (top0, top1, NULL_RTX, &op0, &op1, - EXPAND_NORMAL); - goto binop3; + expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, + EXPAND_NORMAL); + temp = expand_widening_mult (mode, op0, op1, target, + unsignedp, this_optab); + return REDUCE_BIT_FIELD (temp); } - else if (optab_handler (other_optab, mode)->insn_code != CODE_FOR_nothing - && innermode == word_mode) + if (optab_handler (other_optab, mode)->insn_code != CODE_FOR_nothing + && innermode == word_mode) { rtx htem, hipart; - op0 = expand_normal (top0); - if (TREE_CODE (subexp1) == INTEGER_CST) + op0 = expand_normal (treeop0); + if (TREE_CODE (treeop1) == INTEGER_CST) op1 = convert_modes (innermode, mode, - expand_normal (subexp1), unsignedp); + expand_normal (treeop1), unsignedp); else - op1 = expand_normal (top1); + op1 = expand_normal (treeop1); temp = expand_binop (mode, other_optab, op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); hipart = gen_highpart (innermode, temp); @@ -7818,7 +7748,53 @@ expand_expr_real_2 (sepops ops, rtx targ } } } - expand_operands (subexp0, subexp1, subtarget, &op0, &op1, EXPAND_NORMAL); + treeop0 = fold_build1 (CONVERT_EXPR, type, treeop0); + treeop1 = fold_build1 (CONVERT_EXPR, type, treeop1); + expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); + return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp)); + + case MULT_EXPR: + /* If this is a fixed-point operation, then we cannot use the code + below because "expand_mult" doesn't support sat/no-sat fixed-point + multiplications. */ + if (ALL_FIXED_POINT_MODE_P (mode)) + goto binop; + + /* If first operand is constant, swap them. + Thus the following special case checks need only + check the second operand. */ + if (TREE_CODE (treeop0) == INTEGER_CST) + { + tree t1 = treeop0; + treeop0 = treeop1; + treeop1 = t1; + } + + /* Attempt to return something suitable for generating an + indexed address, for machines that support that. */ + + if (modifier == EXPAND_SUM && mode == ptr_mode + && host_integerp (treeop1, 0)) + { + tree exp1 = treeop1; + + op0 = expand_expr (treeop0, subtarget, VOIDmode, + EXPAND_SUM); + + if (!REG_P (op0)) + op0 = force_operand (op0, NULL_RTX); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (mode, op0); + + return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0, + gen_int_mode (tree_low_cst (exp1, 0), + TYPE_MODE (TREE_TYPE (exp1))))); + } + + if (modifier == EXPAND_STACK_PARM) + target = 0; + + expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp)); case TRUNC_DIV_EXPR: Index: expmed.c =================================================================== --- expmed.c (revision 158513) +++ expmed.c (working copy) @@ -3217,6 +3217,55 @@ expand_mult (enum machine_mode mode, rtx gcc_assert (op0); return op0; } + +/* Perform a widening multiplication and return an rtx for the result. + MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); + TARGET is a suggestion for where to store the result (an rtx). + THIS_OPTAB is the optab we should use, it must be either umul_widen_optab + or smul_widen_optab. + + We check specially for a constant integer as OP1, comparing the + cost of a widening multiply against the cost of a sequence of shifts + and adds. */ + +rtx +expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, + int unsignedp, optab this_optab) +{ + bool speed = optimize_insn_for_speed_p (); + + if (CONST_INT_P (op1) + && (INTVAL (op1) >= 0 + || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)) + { + HOST_WIDE_INT coeff = INTVAL (op1); + int max_cost; + enum mult_variant variant; + struct algorithm algorithm; + + /* Special case powers of two. */ + if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) + { + op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); + return expand_shift (LSHIFT_EXPR, mode, op0, + build_int_cst (NULL_TREE, floor_log2 (coeff)), + target, unsignedp); + } + + /* Exclude cost of op0 from max_cost to match the cost + calculation of the synth_mult. */ + max_cost = mul_widen_cost[speed][mode]; + if (choose_mult_variant (mode, coeff, &algorithm, &variant, + max_cost)) + { + op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); + return expand_mult_const (mode, op0, coeff, target, + &algorithm, variant); + } + } + return expand_binop (mode, this_optab, op0, op1, target, + unsignedp, OPTAB_LIB_WIDEN); +} /* Return the smallest n such that 2**n >= X. */ Index: cfgexpand.c =================================================================== --- cfgexpand.c (revision 158513) +++ cfgexpand.c (working copy) @@ -3007,14 +3007,15 @@ expand_debug_expr (tree exp) if (SCALAR_INT_MODE_P (GET_MODE (op0)) && SCALAR_INT_MODE_P (mode)) { + enum machine_mode inner_mode = GET_MODE (op0); if (TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (exp, 0)))) - op0 = gen_rtx_ZERO_EXTEND (mode, op0); + op0 = simplify_gen_unary (ZERO_EXTEND, mode, op0, inner_mode); else - op0 = gen_rtx_SIGN_EXTEND (mode, op0); + op0 = simplify_gen_unary (SIGN_EXTEND, mode, op0, inner_mode); if (TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (exp, 1)))) - op1 = gen_rtx_ZERO_EXTEND (mode, op1); + op1 = simplify_gen_unary (ZERO_EXTEND, mode, op1, inner_mode); else - op1 = gen_rtx_SIGN_EXTEND (mode, op1); + op1 = simplify_gen_unary (SIGN_EXTEND, mode, op1, inner_mode); return gen_rtx_MULT (mode, op0, op1); } return NULL; Index: tree-cfg.c =================================================================== --- tree-cfg.c (revision 158513) +++ tree-cfg.c (working copy) @@ -3455,8 +3455,13 @@ do_pointer_plus_expr_check: connected to the operand types. */ return verify_gimple_comparison (lhs_type, rhs1, rhs2); - case WIDEN_SUM_EXPR: case WIDEN_MULT_EXPR: + if (TREE_CODE (lhs_type) != INTEGER_TYPE) + return true; + return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type)) + || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type))); + + case WIDEN_SUM_EXPR: case VEC_WIDEN_MULT_HI_EXPR: case VEC_WIDEN_MULT_LO_EXPR: case VEC_PACK_TRUNC_EXPR: Index: passes.c =================================================================== --- passes.c (revision 158513) +++ passes.c (working copy) @@ -944,6 +944,7 @@ init_optimization_passes (void) NEXT_PASS (pass_forwprop); NEXT_PASS (pass_phiopt); NEXT_PASS (pass_fold_builtins); + NEXT_PASS (pass_optimize_widening_mul); NEXT_PASS (pass_tail_calls); NEXT_PASS (pass_rename_ssa_copies); NEXT_PASS (pass_uncprop); Index: testsuite/gcc.target/i386/wmul-2.c =================================================================== --- testsuite/gcc.target/i386/wmul-2.c (revision 0) +++ testsuite/gcc.target/i386/wmul-2.c (revision 0) @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void vec_mpy(int y[], const int x[], int scaler) +{ + int i; + + for (i = 0; i < 150; i++) + y[i] += (((long long)scaler * x[i]) >> 31); +} + +/* { dg-final { scan-assembler-times "imull" 1 } } */ Index: testsuite/gcc.target/i386/wmul-1.c =================================================================== --- testsuite/gcc.target/i386/wmul-1.c (revision 0) +++ testsuite/gcc.target/i386/wmul-1.c (revision 0) @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +long long mac(const int *a, const int *b, long long sqr, long long *sum) +{ + int i; + long long dotp = *sum; + + for (i = 0; i < 150; i++) { + dotp += (long long)b[i] * a[i]; + sqr += (long long)b[i] * b[i]; + } + + *sum = dotp; + return sqr; +} + +/* { dg-final { scan-assembler-times "imull" 2 } } */ Index: testsuite/gcc.target/bfin/wmul-1.c =================================================================== --- testsuite/gcc.target/bfin/wmul-1.c (revision 0) +++ testsuite/gcc.target/bfin/wmul-1.c (revision 0) @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int mac(const short *a, const short *b, int sqr, int *sum) +{ + int i; + int dotp = *sum; + + for (i = 0; i < 150; i++) { + dotp += b[i] * a[i]; + sqr += b[i] * b[i]; + } + + *sum = dotp; + return sqr; +} + +/* { dg-final { scan-assembler-times "\\(IS\\)" 2 } } */ Index: testsuite/gcc.target/bfin/wmul-2.c =================================================================== --- testsuite/gcc.target/bfin/wmul-2.c (revision 0) +++ testsuite/gcc.target/bfin/wmul-2.c (revision 0) @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void vec_mpy(int y[], const short x[], short scaler) +{ + int i; + + for (i = 0; i < 150; i++) + y[i] += ((scaler * x[i]) >> 31); +} + +/* { dg-final { scan-assembler-times "\\(IS\\)" 1 } } */