This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] FMA on trees
On Tue, 2 Nov 2010, Richard Henderson wrote:
> On 11/02/2010 06:24 AM, Richard Guenther wrote:
> > *************** set_unsafe_math_optimizations_flags (int
> > *** 2289,2294 ****
> > --- 2301,2307 ----
> > flag_signed_zeros = !set;
> > flag_associative_math = set;
> > flag_reciprocal_math = set;
> > + flag_fp_contract_mode = set ? FP_CONTRACT_FAST : FP_CONTRACT_OFF;
>
> Off? Default is FAST anyway. I can't think what to set here
> for -fno-fast-math, actually.
Yeah, I wondered about that, too. I've now just removed -ffp-contract
from unsafe-math handling completely. We can revisit that if we
change the default and/or implement ON.
> > + fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type)
> > + {
> > + if (validate_arg (arg0, REAL_TYPE)
> > + && validate_arg(arg1, REAL_TYPE)
> > + && validate_arg(arg2, REAL_TYPE))
> > + {
> > + if (TREE_CODE (arg0) == REAL_CST
> > + && TREE_CODE (arg1) == REAL_CST
> > + && TREE_CODE (arg2) == REAL_CST)
> > + return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
> > +
> > + /* ??? Only expand to FMA_EXPR if it's directly supported. */
> > + if (optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
> > + return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2);
>
> I know this is my bit, but I just remembered that this means we
> need a new entry in fold_ternary_loc to fold FMA_EXPR.
Done.
> Finally, if !flag_signed_zeros (part of fast-math), we probably
> want to fold -fma(a,b,c) -> fma(-a,b,c).
Hm, that doesn't sound correct to me ;)
The following adds vectorization capabilities as well which fixes the
seen regressions.
Bootstrap and regtest pending.
Richard.
2010-10-22 Richard Guenther <rguenther@suse.de>
Richard Henderson <rth@redhat.com>
* tree.def (FMA_EXPR): New tree code.
* expr.c (expand_expr_real_2): Add FMA_EXPR expansion code.
* gimple.c (gimple_rhs_class_table): FMA_EXPR is a GIMPLE_TERNARY_RHS.
* tree-cfg.c (verify_gimple_assign_ternary): Verify FMA_EXPR types.
* tree-inline.c (estimate_operator_cost): Handle FMA_EXPR.
* gimple-pretty-print.c (dump_ternary_rhs): Likewise.
* tree-ssa-math-opts.c (convert_mult_to_fma): New function.
(execute_optimize_widening_mul): Call it. Reorganize to allow
dead stmt removal. Move TODO flags ...
(pass_optimize_widening_mul): ... here.
* flag-types.h (enum fp_contract_mode): New enum.
* common.opt (flag_fp_contract_mode): New variable.
(-ffp-contract): New option.
* opts.c (common_handle_option): Handle it.
* doc/invoke.texi (-ffp-contract): Document.
* tree.h (fold_fma): Declare.
* builtins.c (fold_fma): New function.
(fold_builtin_fma): Likewise.
(fold_builtin_3): Call it for fma.
* fold-const.c (fold_ternary_loc): Fold FMA_EXPR.
* optabs.c (optab_for_tree_code): Handle FMA_EXPR.
* config/i386/sse.md (fms<mode>4, fnma<mode>, fnms<mode>4):
New expanders.
* doc/md.texi (fms<mode>4, fnma<mode>, fnms<mode>4): Document new
named patterns.
* genopinit.c (optabs): Initialize fms_optab, fnma_optab and fnms_optab.
* optabs.h (enum optab_index): Add OTI_fms, OTI_fnma and OTI_fnms.
(fms_optab, fnma_optab, fnms_optab): New defines.
* gimplify.c (gimplify_expr): Handle binary truth expressions
explicitly. Handle FMA_EXPR.
* tree-vect-stmts.c (vectorizable_operation): Handle ternary
operations.
* gcc.target/i386/fma4-vector-2.c: New testcase.
Index: gcc/tree.def
===================================================================
*** gcc/tree.def.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/tree.def 2010-11-03 13:21:19.000000000 +0100
*************** DEFTREECODE (WIDEN_MULT_PLUS_EXPR, "wide
*** 1092,1097 ****
--- 1092,1103 ----
is subtracted from t3. */
DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3)
+ /* Fused multiply-add.
+ All operands and the result are of the same type. No intermediate
+ rounding is performed after multiplying operand one with operand two
+ before adding operand three. */
+ DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3)
+
/* Whole vector left/right shift in bits.
Operand 0 is a vector to be shifted.
Operand 1 is an integer shift amount in bits. */
Index: gcc/expr.c
===================================================================
*** gcc/expr.c.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/expr.c 2010-11-03 13:21:19.000000000 +0100
*************** expand_expr_real_2 (sepops ops, rtx targ
*** 7254,7260 ****
int ignore;
bool reduce_bit_field;
location_t loc = ops->location;
! tree treeop0, treeop1;
#define REDUCE_BIT_FIELD(expr) (reduce_bit_field \
? reduce_to_bit_field_precision ((expr), \
target, \
--- 7254,7260 ----
int ignore;
bool reduce_bit_field;
location_t loc = ops->location;
! tree treeop0, treeop1, treeop2;
#define REDUCE_BIT_FIELD(expr) (reduce_bit_field \
? reduce_to_bit_field_precision ((expr), \
target, \
*************** expand_expr_real_2 (sepops ops, rtx targ
*** 7267,7272 ****
--- 7267,7273 ----
treeop0 = ops->op0;
treeop1 = ops->op1;
+ treeop2 = ops->op2;
/* We should be called only on simple (binary or unary) expressions,
exactly those that are valid in gimple expressions that aren't
*************** expand_expr_real_2 (sepops ops, rtx targ
*** 7624,7630 ****
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
! op2 = expand_normal (ops->op2);
target = expand_widen_pattern_expr (ops, op0, op1, op2,
target, unsignedp);
return target;
--- 7625,7631 ----
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
! op2 = expand_normal (treeop2);
target = expand_widen_pattern_expr (ops, op0, op1, op2,
target, unsignedp);
return target;
*************** expand_expr_real_2 (sepops ops, rtx targ
*** 7711,7716 ****
--- 7712,7757 ----
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
+ case FMA_EXPR:
+ {
+ optab opt = fma_optab;
+ gimple def0, def2;
+
+ def0 = get_def_for_expr (treeop0, NEGATE_EXPR);
+ def2 = get_def_for_expr (treeop2, NEGATE_EXPR);
+
+ op0 = op2 = NULL;
+
+ if (def0 && def2
+ && optab_handler (fnms_optab, mode) != CODE_FOR_nothing)
+ {
+ opt = fnms_optab;
+ op0 = expand_normal (gimple_assign_rhs1 (def0));
+ op2 = expand_normal (gimple_assign_rhs1 (def2));
+ }
+ else if (def0
+ && optab_handler (fnma_optab, mode) != CODE_FOR_nothing)
+ {
+ opt = fnma_optab;
+ op0 = expand_normal (gimple_assign_rhs1 (def0));
+ }
+ else if (def2
+ && optab_handler (fms_optab, mode) != CODE_FOR_nothing)
+ {
+ opt = fms_optab;
+ op2 = expand_normal (gimple_assign_rhs1 (def2));
+ }
+
+ if (op0 == NULL)
+ op0 = expand_expr (treeop0, subtarget, VOIDmode, EXPAND_NORMAL);
+ if (op2 == NULL)
+ op2 = expand_normal (treeop2);
+ op1 = expand_normal (treeop1);
+
+ return expand_ternary_op (TYPE_MODE (type), opt,
+ op0, op1, op2, target, 0);
+ }
+
case MULT_EXPR:
/* If this is a fixed-point operation, then we cannot use the code
below because "expand_mult" doesn't support sat/no-sat fixed-point
Index: gcc/gimple.c
===================================================================
*** gcc/gimple.c.orig 2010-11-03 10:55:43.000000000 +0100
--- gcc/gimple.c 2010-11-03 13:21:19.000000000 +0100
*************** get_gimple_rhs_num_ops (enum tree_code c
*** 2529,2535 ****
|| (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS \
: (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS \
: ((SYM) == WIDEN_MULT_PLUS_EXPR \
! || (SYM) == WIDEN_MULT_MINUS_EXPR) ? GIMPLE_TERNARY_RHS \
: ((SYM) == COND_EXPR \
|| (SYM) == CONSTRUCTOR \
|| (SYM) == OBJ_TYPE_REF \
--- 2529,2536 ----
|| (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS \
: (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS \
: ((SYM) == WIDEN_MULT_PLUS_EXPR \
! || (SYM) == WIDEN_MULT_MINUS_EXPR \
! || (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \
: ((SYM) == COND_EXPR \
|| (SYM) == CONSTRUCTOR \
|| (SYM) == OBJ_TYPE_REF \
Index: gcc/tree-cfg.c
===================================================================
*** gcc/tree-cfg.c.orig 2010-11-02 16:37:53.000000000 +0100
--- gcc/tree-cfg.c 2010-11-03 13:21:19.000000000 +0100
*************** verify_gimple_assign_ternary (gimple stm
*** 3748,3753 ****
--- 3748,3767 ----
}
break;
+ case FMA_EXPR:
+ if (!useless_type_conversion_p (lhs_type, rhs1_type)
+ || !useless_type_conversion_p (lhs_type, rhs2_type)
+ || !useless_type_conversion_p (lhs_type, rhs3_type))
+ {
+ error ("type mismatch in fused multiply-add expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+ break;
+
default:
gcc_unreachable ();
}
Index: gcc/tree-inline.c
===================================================================
*** gcc/tree-inline.c.orig 2010-11-03 10:55:43.000000000 +0100
--- gcc/tree-inline.c 2010-11-03 13:21:19.000000000 +0100
*************** estimate_operator_cost (enum tree_code c
*** 3283,3288 ****
--- 3283,3289 ----
case POINTER_PLUS_EXPR:
case MINUS_EXPR:
case MULT_EXPR:
+ case FMA_EXPR:
case ADDR_SPACE_CONVERT_EXPR:
case FIXED_CONVERT_EXPR:
Index: gcc/gimple-pretty-print.c
===================================================================
*** gcc/gimple-pretty-print.c.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/gimple-pretty-print.c 2010-11-03 13:21:19.000000000 +0100
*************** dump_ternary_rhs (pretty_printer *buffer
*** 400,405 ****
--- 400,413 ----
pp_character (buffer, '>');
break;
+ case FMA_EXPR:
+ dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
+ pp_string (buffer, " * ");
+ dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
+ pp_string (buffer, " + ");
+ dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
+ break;
+
default:
gcc_unreachable ();
}
Index: gcc/tree-ssa-math-opts.c
===================================================================
*** gcc/tree-ssa-math-opts.c.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/tree-ssa-math-opts.c 2010-11-03 13:21:19.000000000 +0100
*************** convert_plusminus_to_widen (gimple_stmt_
*** 1494,1499 ****
--- 1494,1616 ----
return true;
}
+ /* Combine the multiplication at MUL_STMT with uses in additions and
+ subtractions to form fused multiply-add operations. Returns true
+ if successful and MUL_STMT should be removed. */
+
+ static bool
+ convert_mult_to_fma (gimple mul_stmt)
+ {
+ tree mul_result = gimple_assign_lhs (mul_stmt);
+ tree type = TREE_TYPE (mul_result);
+ gimple use_stmt, fma_stmt;
+ use_operand_p use_p;
+ imm_use_iterator imm_iter;
+
+ if (FLOAT_TYPE_P (type)
+ && flag_fp_contract_mode == FP_CONTRACT_OFF)
+ return false;
+
+ /* We don't want to do bitfield reduction ops. */
+ if (INTEGRAL_TYPE_P (type)
+ && (TYPE_PRECISION (type)
+ != GET_MODE_PRECISION (TYPE_MODE (type))))
+ return false;
+
+ /* If the target doesn't support it, don't generate it. We assume that
+ if fma isn't available then fms, fnma or fnms are not either. */
+ if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return false;
+
+ /* Make sure that the multiplication statement becomes dead after
+ the transformation, thus that all uses are transformed to FMAs.
+ This means we assume that an FMA operation has the same cost
+ as an addition. */
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
+ {
+ enum tree_code use_code;
+
+ use_stmt = USE_STMT (use_p);
+
+ if (!is_gimple_assign (use_stmt))
+ return false;
+ use_code = gimple_assign_rhs_code (use_stmt);
+ /* ??? We need to handle NEGATE_EXPR to eventually form fnms. */
+ if (use_code != PLUS_EXPR
+ && use_code != MINUS_EXPR)
+ return false;
+
+ /* For now restrict this operations to single basic blocks. In theory
+ we would want to support sinking the multiplication in
+ m = a*b;
+ if ()
+ ma = m + c;
+ else
+ d = m;
+ to form a fma in the then block and sink the multiplication to the
+ else block. */
+ if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
+ return false;
+
+ /* We can't handle a * b + a * b. */
+ if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
+ return false;
+
+ /* If the target doesn't support a * b - c then drop the ball. */
+ if (gimple_assign_rhs1 (use_stmt) == mul_result
+ && use_code == MINUS_EXPR
+ && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return false;
+
+ /* If the target doesn't support -a * b + c then drop the ball. */
+ if (gimple_assign_rhs2 (use_stmt) == mul_result
+ && use_code == MINUS_EXPR
+ && optab_handler (fnma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return false;
+
+ /* We don't yet generate -a * b - c below yet. */
+ }
+
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
+ {
+ tree addop, mulop1;
+ gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+
+ mulop1 = gimple_assign_rhs1 (mul_stmt);
+ if (gimple_assign_rhs1 (use_stmt) == mul_result)
+ {
+ addop = gimple_assign_rhs2 (use_stmt);
+ /* a * b - c -> a * b + (-c) */
+ if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ addop = force_gimple_operand_gsi (&gsi,
+ build1 (NEGATE_EXPR,
+ type, addop),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ }
+ else
+ {
+ addop = gimple_assign_rhs1 (use_stmt);
+ /* a - b * c -> (-b) * c + a */
+ if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ mulop1 = force_gimple_operand_gsi (&gsi,
+ build1 (NEGATE_EXPR,
+ type, mulop1),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ }
+
+ fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
+ gimple_assign_lhs (use_stmt),
+ mulop1,
+ gimple_assign_rhs2 (mul_stmt),
+ addop);
+ gsi_replace (&gsi, fma_stmt, true);
+ }
+
+ return true;
+ }
+
/* Find integer multiplications where the operands are extended from
smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
where appropriate. */
*************** convert_plusminus_to_widen (gimple_stmt_
*** 1501,1531 ****
static unsigned int
execute_optimize_widening_mul (void)
{
- bool changed = false;
basic_block bb;
FOR_EACH_BB (bb)
{
gimple_stmt_iterator gsi;
! for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
enum tree_code code;
! if (!is_gimple_assign (stmt))
! continue;
!
! code = gimple_assign_rhs_code (stmt);
! if (code == MULT_EXPR)
! changed |= convert_mult_to_widen (stmt);
! else if (code == PLUS_EXPR || code == MINUS_EXPR)
! changed |= convert_plusminus_to_widen (&gsi, stmt, code);
}
}
! return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
! | TODO_verify_stmts : 0);
}
static bool
--- 1618,1662 ----
static unsigned int
execute_optimize_widening_mul (void)
{
basic_block bb;
FOR_EACH_BB (bb)
{
gimple_stmt_iterator gsi;
! for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
{
gimple stmt = gsi_stmt (gsi);
enum tree_code code;
! if (is_gimple_assign (stmt))
! {
! code = gimple_assign_rhs_code (stmt);
! switch (code)
! {
! case MULT_EXPR:
! if (!convert_mult_to_widen (stmt)
! && convert_mult_to_fma (stmt))
! {
! gsi_remove (&gsi, true);
! release_defs (stmt);
! continue;
! }
! break;
!
! case PLUS_EXPR:
! case MINUS_EXPR:
! convert_plusminus_to_widen (&gsi, stmt, code);
! break;
!
! default:;
! }
! }
! gsi_next (&gsi);
}
}
! return 0;
}
static bool
*************** struct gimple_opt_pass pass_optimize_wid
*** 1549,1554 ****
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
! 0 /* todo_flags_finish */
}
};
--- 1680,1688 ----
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
! TODO_verify_ssa
! | TODO_verify_stmts
! | TODO_dump_func
! | TODO_update_ssa /* todo_flags_finish */
}
};
Index: gcc/testsuite/gcc.target/i386/fma4-vector-2.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.target/i386/fma4-vector-2.c 2010-11-03 13:21:19.000000000 +0100
***************
*** 0 ****
--- 1,21 ----
+ /* { dg-do compile } */
+ /* { dg-require-effective-target lp64 } */
+ /* { dg-options "-O2 -mfma4 -ftree-vectorize -mtune=generic" } */
+
+ float r[256], s[256];
+ float x[256];
+ float y[256];
+ float z[256];
+
+ void foo (void)
+ {
+ int i;
+ for (i = 0; i < 256; ++i)
+ {
+ r[i] = x[i] * y[i] - z[i];
+ s[i] = x[i] * y[i] + z[i];
+ }
+ }
+
+ /* { dg-final { scan-assembler "vfmaddps" } } */
+ /* { dg-final { scan-assembler "vfmsubps" } } */
Index: gcc/common.opt
===================================================================
*** gcc/common.opt.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/common.opt 2010-11-03 13:21:19.000000000 +0100
*************** bool flag_warn_unused_result = false
*** 58,63 ****
--- 58,67 ----
Variable
int *param_values
+ ; Floating-point contraction mode, fast by default.
+ Variable
+ enum fp_contract_mode flag_fp_contract_mode = FP_CONTRACT_FAST
+
###
Driver
*************** fforward-propagate
*** 857,862 ****
--- 861,870 ----
Common Report Var(flag_forward_propagate) Optimization
Perform a forward propagation pass on RTL
+ ffp-contract=
+ Common Joined RejectNegative
+ -ffp-contract=[off|on|fast] Perform floating-point expression contraction.
+
; Nonzero means don't put addresses of constant functions in registers.
; Used for compiling the Unix kernel, where strange substitutions are
; done on the assembly output.
Index: gcc/doc/invoke.texi
===================================================================
*** gcc/doc/invoke.texi.orig 2010-11-02 16:37:50.000000000 +0100
--- gcc/doc/invoke.texi 2010-11-03 13:21:19.000000000 +0100
*************** Objective-C and Objective-C++ Dialects}.
*** 342,348 ****
-fdelayed-branch -fdelete-null-pointer-checks -fdse -fdse @gol
-fearly-inlining -fipa-sra -fexpensive-optimizations -ffast-math @gol
-ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol
! -fforward-propagate -ffunction-sections @gol
-fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol
-fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol
-finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol
--- 342,348 ----
-fdelayed-branch -fdelete-null-pointer-checks -fdse -fdse @gol
-fearly-inlining -fipa-sra -fexpensive-optimizations -ffast-math @gol
-ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol
! -fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol
-fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol
-fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol
-finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol
*************** loop unrolling.
*** 5980,5985 ****
--- 5980,5997 ----
This option is enabled by default at optimization levels @option{-O},
@option{-O2}, @option{-O3}, @option{-Os}.
+ @item -ffp-contract=@var{style}
+ @opindex ffp-contract
+ @option{-ffp-contract=off} disables floating-point expression contraction.
+ @option{-ffp-contract=fast} enables floating-point expression contraction
+ such as forming of fused multiply-add operations if the target has
+ native support for them.
+ @option{-ffp-contract=on} enables floating-point expression contraction
+ if allowed by the language standard. This is currently not implemented
+ and treated equal to @option{-ffp-contract=off}.
+
+ The default is @option{-ffp-contract=fast}.
+
@item -fomit-frame-pointer
@opindex fomit-frame-pointer
Don't keep the frame pointer in a register for functions that
*************** an exact implementation of IEEE or ISO r
*** 7816,7822 ****
math functions. It may, however, yield faster code for programs
that do not require the guarantees of these specifications.
Enables @option{-fno-signed-zeros}, @option{-fno-trapping-math},
! @option{-fassociative-math} and @option{-freciprocal-math}.
The default is @option{-fno-unsafe-math-optimizations}.
--- 7828,7835 ----
math functions. It may, however, yield faster code for programs
that do not require the guarantees of these specifications.
Enables @option{-fno-signed-zeros}, @option{-fno-trapping-math},
! @option{-fassociative-math}, @option{-freciprocal-math} and
! @option{-ffp-contract=fast}.
The default is @option{-fno-unsafe-math-optimizations}.
Index: gcc/opts.c
===================================================================
*** gcc/opts.c.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/opts.c 2010-11-03 13:43:06.000000000 +0100
*************** common_handle_option (struct gcc_options
*** 1901,1906 ****
--- 1901,1918 ----
return false;
break;
+ case OPT_ffp_contract_:
+ if (!strcmp (arg, "on"))
+ /* Not implemented, fall back to conservative FP_CONTRACT_OFF. */
+ flag_fp_contract_mode = FP_CONTRACT_OFF;
+ else if (!strcmp (arg, "off"))
+ flag_fp_contract_mode = FP_CONTRACT_OFF;
+ else if (!strcmp (arg, "fast"))
+ flag_fp_contract_mode = FP_CONTRACT_FAST;
+ else
+ error ("unknown floating point contraction style \"%s\"", arg);
+ break;
+
case OPT_fexcess_precision_:
if (!strcmp (arg, "fast"))
flag_excess_precision_cmdline = EXCESS_PRECISION_FAST;
Index: gcc/builtins.c
===================================================================
*** gcc/builtins.c.orig 2010-11-03 10:55:43.000000000 +0100
--- gcc/builtins.c 2010-11-03 13:34:38.000000000 +0100
*************** fold_builtin_abs (location_t loc, tree a
*** 9266,9271 ****
--- 9266,9304 ----
return fold_build1_loc (loc, ABS_EXPR, type, arg);
}
+ /* Fold a fma operation with arguments ARG[012]. */
+
+ tree
+ fold_fma (location_t loc, tree type, tree arg0, tree arg1, tree arg2)
+ {
+ if (TREE_CODE (arg0) == REAL_CST
+ && TREE_CODE (arg1) == REAL_CST
+ && TREE_CODE (arg2) == REAL_CST)
+ return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
+
+ return NULL_TREE;
+ }
+
+ /* Fold a call to fma, fmaf, or fmal with arguments ARG[012]. */
+
+ static tree
+ fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type)
+ {
+ if (validate_arg (arg0, REAL_TYPE)
+ && validate_arg(arg1, REAL_TYPE)
+ && validate_arg(arg2, REAL_TYPE))
+ {
+ tree tem = fold_fma (loc, type, arg0, arg1, arg2);
+ if (tem)
+ return tem;
+
+ /* ??? Only expand to FMA_EXPR if it's directly supported. */
+ if (optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
+ return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2);
+ }
+ return NULL_TREE;
+ }
+
/* Fold a call to builtin fmin or fmax. */
static tree
*************** fold_builtin_3 (location_t loc, tree fnd
*** 10540,10549 ****
return fold_builtin_sincos (loc, arg0, arg1, arg2);
CASE_FLT_FN (BUILT_IN_FMA):
! if (validate_arg (arg0, REAL_TYPE)
! && validate_arg(arg1, REAL_TYPE)
! && validate_arg(arg2, REAL_TYPE))
! return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
break;
CASE_FLT_FN (BUILT_IN_REMQUO):
--- 10573,10579 ----
return fold_builtin_sincos (loc, arg0, arg1, arg2);
CASE_FLT_FN (BUILT_IN_FMA):
! return fold_builtin_fma (loc, arg0, arg1, arg2, type);
break;
CASE_FLT_FN (BUILT_IN_REMQUO):
Index: gcc/config/i386/sse.md
===================================================================
*** gcc/config/i386/sse.md.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/config/i386/sse.md 2010-11-03 13:21:19.000000000 +0100
***************
*** 1859,1865 ****
;; Intrinsic FMA operations.
! ;; The standard name for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
--- 1859,1865 ----
;; Intrinsic FMA operations.
! ;; The standard names for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
***************
*** 1869,1874 ****
--- 1869,1901 ----
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
"")
+ (define_expand "fms<mode>4"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (fma:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand")
+ (match_operand:FMAMODE 2 "nonimmediate_operand")
+ (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+ "")
+
+ (define_expand "fnma<mode>4"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (fma:FMAMODE
+ (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand")
+ (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+ "")
+
+ (define_expand "fnms<mode>4"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (fma:FMAMODE
+ (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand")
+ (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+ "")
+
;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
Index: gcc/doc/md.texi
===================================================================
*** gcc/doc/md.texi.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/doc/md.texi 2010-11-03 13:21:19.000000000 +0100
*************** pattern is used to implement the @code{f
*** 3958,3963 ****
--- 3958,3993 ----
multiply followed by the add if the machine does not perform a
rounding step between the operations.
+ @cindex @code{fms@var{m}4} instruction pattern
+ @item @samp{fms@var{m}4}
+ Like @code{fma@var{m}4}, except operand 3 subtracted from the
+ product instead of added to the product. This is represented
+ in the rtl as
+
+ @smallexample
+ (fma:@var{m} @var{op1} @var{op2} (neg:@var{m} @var{op3}))
+ @end smallexample
+
+ @cindex @code{fnma@var{m}4} instruction pattern
+ @item @samp{fnma@var{m}4}
+ Like @code{fma@var{m}4} except that the intermediate product
+ is negated before being added to operand 3. This is represented
+ in the rtl as
+
+ @smallexample
+ (fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} @var{op3})
+ @end smallexample
+
+ @cindex @code{fnms@var{m}4} instruction pattern
+ @item @samp{fnms@var{m}4}
+ Like @code{fms@var{m}4} except that the intermediate product
+ is negated before subtracting operand 3. This is represented
+ in the rtl as
+
+ @smallexample
+ (fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} (neg:@var{m} @var{op3}))
+ @end smallexample
+
@cindex @code{min@var{m}3} instruction pattern
@cindex @code{max@var{m}3} instruction pattern
@item @samp{smin@var{m}3}, @samp{smax@var{m}3}
Index: gcc/flag-types.h
===================================================================
*** gcc/flag-types.h.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/flag-types.h 2010-11-03 13:21:19.000000000 +0100
*************** enum warn_strict_overflow_code
*** 152,155 ****
--- 152,162 ----
WARN_STRICT_OVERFLOW_MAGNITUDE = 5
};
+ /* Floating-point contraction mode. */
+ enum fp_contract_mode {
+ FP_CONTRACT_OFF = 0,
+ FP_CONTRACT_ON = 1,
+ FP_CONTRACT_FAST = 2
+ };
+
#endif /* ! GCC_FLAG_TYPES_H */
Index: gcc/genopinit.c
===================================================================
*** gcc/genopinit.c.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/genopinit.c 2010-11-03 13:21:19.000000000 +0100
*************** static const char * const optabs[] =
*** 160,165 ****
--- 160,168 ----
"set_optab_handler (floor_optab, $A, CODE_FOR_$(floor$a2$))",
"set_convert_optab_handler (lfloor_optab, $B, $A, CODE_FOR_$(lfloor$F$a$I$b2$))",
"set_optab_handler (fma_optab, $A, CODE_FOR_$(fma$a4$))",
+ "set_optab_handler (fms_optab, $A, CODE_FOR_$(fms$a4$))",
+ "set_optab_handler (fnma_optab, $A, CODE_FOR_$(fnma$a4$))",
+ "set_optab_handler (fnms_optab, $A, CODE_FOR_$(fnms$a4$))",
"set_optab_handler (ceil_optab, $A, CODE_FOR_$(ceil$a2$))",
"set_convert_optab_handler (lceil_optab, $B, $A, CODE_FOR_$(lceil$F$a$I$b2$))",
"set_optab_handler (round_optab, $A, CODE_FOR_$(round$a2$))",
Index: gcc/gimplify.c
===================================================================
*** gcc/gimplify.c.orig 2010-11-03 10:55:43.000000000 +0100
--- gcc/gimplify.c 2010-11-03 13:21:19.000000000 +0100
*************** gimplify_expr (tree *expr_p, gimple_seq
*** 7170,7175 ****
--- 7170,7185 ----
ret = gimplify_omp_atomic (expr_p, pre_p);
break;
+ case TRUTH_AND_EXPR:
+ case TRUTH_OR_EXPR:
+ case TRUTH_XOR_EXPR:
+ /* Classified as tcc_expression. */
+ goto expr_2;
+
+ case FMA_EXPR:
+ /* Classified as tcc_expression. */
+ goto expr_3;
+
case POINTER_PLUS_EXPR:
/* Convert ((type *)A)+offset into &A->field_of_type_and_offset.
The second is gimple immediate saving a need for extra statement.
*************** gimplify_expr (tree *expr_p, gimple_seq
*** 7249,7264 ****
break;
}
case tcc_declaration:
case tcc_constant:
ret = GS_ALL_DONE;
goto dont_recalculate;
default:
! gcc_assert (TREE_CODE (*expr_p) == TRUTH_AND_EXPR
! || TREE_CODE (*expr_p) == TRUTH_OR_EXPR
! || TREE_CODE (*expr_p) == TRUTH_XOR_EXPR);
! goto expr_2;
}
recalculate_side_effects (*expr_p);
--- 7259,7286 ----
break;
}
+ expr_3:
+ {
+ enum gimplify_status r0, r1, r2;
+
+ r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+ r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+ r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+
+ ret = MIN (MIN (r0, r1), r2);
+ break;
+ }
+
case tcc_declaration:
case tcc_constant:
ret = GS_ALL_DONE;
goto dont_recalculate;
default:
! gcc_unreachable ();
}
recalculate_side_effects (*expr_p);
Index: gcc/optabs.h
===================================================================
*** gcc/optabs.h.orig 2010-11-02 16:37:35.000000000 +0100
--- gcc/optabs.h 2010-11-03 13:21:19.000000000 +0100
*************** enum optab_index
*** 192,197 ****
--- 192,200 ----
OTI_atan2,
/* Floating multiply/add */
OTI_fma,
+ OTI_fms,
+ OTI_fnma,
+ OTI_fnms,
/* Move instruction. */
OTI_mov,
*************** enum optab_index
*** 435,440 ****
--- 438,446 ----
#define pow_optab (&optab_table[OTI_pow])
#define atan2_optab (&optab_table[OTI_atan2])
#define fma_optab (&optab_table[OTI_fma])
+ #define fms_optab (&optab_table[OTI_fms])
+ #define fnma_optab (&optab_table[OTI_fnma])
+ #define fnms_optab (&optab_table[OTI_fnms])
#define mov_optab (&optab_table[OTI_mov])
#define movstrict_optab (&optab_table[OTI_movstrict])
Index: gcc/fold-const.c
===================================================================
*** gcc/fold-const.c.orig 2010-11-03 10:55:43.000000000 +0100
--- gcc/fold-const.c 2010-11-03 13:50:07.000000000 +0100
*************** contains_label_p (tree st)
*** 13281,13290 ****
tree
fold_ternary_loc (location_t loc, enum tree_code code, tree type,
! tree op0, tree op1, tree op2)
{
tree tem;
! tree arg0 = NULL_TREE, arg1 = NULL_TREE;
enum tree_code_class kind = TREE_CODE_CLASS (code);
gcc_assert (IS_EXPR_CODE_CLASS (kind)
--- 13281,13290 ----
tree
fold_ternary_loc (location_t loc, enum tree_code code, tree type,
! tree op0, tree op1, tree op2)
{
tree tem;
! tree arg0 = NULL_TREE, arg1 = NULL_TREE, arg2 = NULL_TREE;
enum tree_code_class kind = TREE_CODE_CLASS (code);
gcc_assert (IS_EXPR_CODE_CLASS (kind)
*************** fold_ternary_loc (location_t loc, enum t
*** 13312,13317 ****
--- 13312,13323 ----
STRIP_NOPS (arg1);
}
+ if (op2)
+ {
+ arg2 = op2;
+ STRIP_NOPS (arg2);
+ }
+
switch (code)
{
case COMPONENT_REF:
*************** fold_ternary_loc (location_t loc, enum t
*** 13610,13615 ****
--- 13616,13632 ----
return NULL_TREE;
+ case FMA_EXPR:
+ /* For integers we can decompose the FMA if possible. */
+ if (TREE_CODE (arg0) == INTEGER_CST
+ && TREE_CODE (arg1) == INTEGER_CST)
+ return fold_build2_loc (loc, PLUS_EXPR, type,
+ const_binop (MULT_EXPR, arg0, arg1), arg2);
+ if (integer_zerop (arg2))
+ return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
+
+ return fold_fma (loc, type, arg0, arg1, arg2);
+
default:
return NULL_TREE;
} /* switch (code) */
Index: gcc/optabs.c
===================================================================
*** gcc/optabs.c.orig 2010-08-18 16:29:14.000000000 +0200
--- gcc/optabs.c 2010-11-03 13:55:11.000000000 +0100
*************** optab_for_tree_code (enum tree_code code
*** 374,379 ****
--- 374,382 ----
: (TYPE_SATURATING (type)
? ssmsub_widen_optab : smsub_widen_optab));
+ case FMA_EXPR:
+ return fma_optab;
+
case REDUC_MAX_EXPR:
return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c.orig 2010-11-03 10:55:43.000000000 +0100
--- gcc/tree-vect-stmts.c 2010-11-03 14:00:55.000000000 +0100
*************** vectorizable_shift (gimple stmt, gimple_
*** 2343,2349 ****
/* Function vectorizable_operation.
! Check if STMT performs a binary or unary operation that can be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at BSI.
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
--- 2343,2350 ----
/* Function vectorizable_operation.
! Check if STMT performs a binary, unary or ternary operation that can
! be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at BSI.
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
*************** vectorizable_operation (gimple stmt, gim
*** 2354,2360 ****
{
tree vec_dest;
tree scalar_dest;
! tree op0, op1 = NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
--- 2355,2361 ----
{
tree vec_dest;
tree scalar_dest;
! tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
*************** vectorizable_operation (gimple stmt, gim
*** 2366,2372 ****
int icode;
tree def;
gimple def_stmt;
! enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
gimple new_stmt = NULL;
stmt_vec_info prev_stmt_info;
int nunits_in;
--- 2367,2374 ----
int icode;
tree def;
gimple def_stmt;
! enum vect_def_type dt[3]
! = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
gimple new_stmt = NULL;
stmt_vec_info prev_stmt_info;
int nunits_in;
*************** vectorizable_operation (gimple stmt, gim
*** 2374,2381 ****
tree vectype_out;
int ncopies;
int j, i;
! VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
! tree vop0, vop1;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
int vf;
--- 2376,2383 ----
tree vectype_out;
int ncopies;
int j, i;
! VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
! tree vop0, vop1, vop2;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
int vf;
*************** vectorizable_operation (gimple stmt, gim
*** 2401,2410 ****
/* Support only unary or binary operations. */
op_type = TREE_CODE_LENGTH (code);
! if (op_type != unary_op && op_type != binary_op)
{
if (vect_print_dump_info (REPORT_DETAILS))
! fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
return false;
}
--- 2403,2413 ----
/* Support only unary or binary operations. */
op_type = TREE_CODE_LENGTH (code);
! if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
{
if (vect_print_dump_info (REPORT_DETAILS))
! fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
! op_type);
return false;
}
*************** vectorizable_operation (gimple stmt, gim
*** 2441,2447 ****
if (nunits_out != nunits_in)
return false;
! if (op_type == binary_op)
{
op1 = gimple_assign_rhs2 (stmt);
if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
--- 2444,2450 ----
if (nunits_out != nunits_in)
return false;
! if (op_type == binary_op || op_type == ternary_op)
{
op1 = gimple_assign_rhs2 (stmt);
if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
*************** vectorizable_operation (gimple stmt, gim
*** 2452,2457 ****
--- 2455,2471 ----
return false;
}
}
+ if (op_type == ternary_op)
+ {
+ op2 = gimple_assign_rhs3 (stmt);
+ if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
+ &dt[2]))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "use not simple.");
+ return false;
+ }
+ }
if (loop_vinfo)
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
*************** vectorizable_operation (gimple stmt, gim
*** 2473,2479 ****
|| code == RROTATE_EXPR)
return false;
! optab = optab_for_tree_code (code, vectype, optab_default);
/* Supportable by target? */
if (!optab)
--- 2487,2493 ----
|| code == RROTATE_EXPR)
return false;
! optab = optab_for_tree_code (code, vectype, optab_default);
/* Supportable by target? */
if (!optab)
*************** vectorizable_operation (gimple stmt, gim
*** 2534,2541 ****
if (!slp_node)
{
vec_oprnds0 = VEC_alloc (tree, heap, 1);
! if (op_type == binary_op)
vec_oprnds1 = VEC_alloc (tree, heap, 1);
}
/* In case the vectorization factor (VF) is bigger than the number
--- 2548,2557 ----
if (!slp_node)
{
vec_oprnds0 = VEC_alloc (tree, heap, 1);
! if (op_type == binary_op || op_type == ternary_op)
vec_oprnds1 = VEC_alloc (tree, heap, 1);
+ if (op_type == ternary_op)
+ vec_oprnds2 = VEC_alloc (tree, heap, 1);
}
/* In case the vectorization factor (VF) is bigger than the number
*************** vectorizable_operation (gimple stmt, gim
*** 2597,2618 ****
/* Handle uses. */
if (j == 0)
{
! if (op_type == binary_op)
vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
slp_node);
else
vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
slp_node);
}
else
! vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
/* Arguments are ready. Create the new vector stmt. */
FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
{
! vop1 = ((op_type == binary_op)
! ? VEC_index (tree, vec_oprnds1, i) : NULL);
! new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
--- 2613,2652 ----
/* Handle uses. */
if (j == 0)
{
! if (op_type == binary_op || op_type == ternary_op)
vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
slp_node);
else
vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
slp_node);
+ if (op_type == ternary_op)
+ {
+ vec_oprnds2 = VEC_alloc (tree, heap, 1);
+ VEC_quick_push (tree, vec_oprnds2,
+ vect_get_vec_def_for_operand (op2, stmt, NULL));
+ }
}
else
! {
! vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
! if (op_type == ternary_op)
! {
! tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
! VEC_quick_push (tree, vec_oprnds2,
! vect_get_vec_def_for_stmt_copy (dt[2],
! vec_oprnd));
! }
! }
/* Arguments are ready. Create the new vector stmt. */
FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
{
! vop1 = ((op_type == binary_op || op_type == ternary_op)
! ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
! vop2 = ((op_type == ternary_op)
! ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
! new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
! vop0, vop1, vop2);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
*************** vectorizable_operation (gimple stmt, gim
*** 2633,2638 ****
--- 2667,2674 ----
VEC_free (tree, heap, vec_oprnds0);
if (vec_oprnds1)
VEC_free (tree, heap, vec_oprnds1);
+ if (vec_oprnds2)
+ VEC_free (tree, heap, vec_oprnds2);
return true;
}
Index: gcc/tree.h
===================================================================
*** gcc/tree.h.orig 2010-11-02 11:16:39.000000000 +0100
--- gcc/tree.h 2010-11-03 13:34:12.000000000 +0100
*************** extern void fold_defer_overflow_warnings
*** 4954,4959 ****
--- 4954,4960 ----
extern void fold_undefer_overflow_warnings (bool, const_gimple, int);
extern void fold_undefer_and_ignore_overflow_warnings (void);
extern bool fold_deferring_overflow_warnings_p (void);
+ extern tree fold_fma (location_t, tree, tree, tree, tree);
enum operand_equal_flag
{