This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] FMA on trees


On Tue, 2 Nov 2010, Richard Henderson wrote:

> On 11/02/2010 06:24 AM, Richard Guenther wrote:
> > *************** set_unsafe_math_optimizations_flags (int
> > *** 2289,2294 ****
> > --- 2301,2307 ----
> >     flag_signed_zeros = !set;
> >     flag_associative_math = set;
> >     flag_reciprocal_math = set;
> > +   flag_fp_contract_mode = set ? FP_CONTRACT_FAST : FP_CONTRACT_OFF;
> 
> Off?  Default is FAST anyway.  I can't think what to set here
> for -fno-fast-math, actually.

Yeah, I wondered about that, too.  I've now just removed -ffp-contract
from unsafe-math handling completely.  We can revisit that if we
change the default and/or implement ON.

> > + fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type)
> > + {
> > +   if (validate_arg (arg0, REAL_TYPE)
> > +       && validate_arg(arg1, REAL_TYPE)
> > +       && validate_arg(arg2, REAL_TYPE))
> > +     {
> > +       if (TREE_CODE (arg0) == REAL_CST
> > + 	  && TREE_CODE (arg1) == REAL_CST
> > + 	  && TREE_CODE (arg2) == REAL_CST)
> > + 	return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
> > + 
> > +       /* ??? Only expand to FMA_EXPR if it's directly supported.  */
> > +       if (optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
> > +         return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2);
> 
> I know this is my bit, but I just remembered that this means we
> need a new entry in fold_ternary_loc to fold FMA_EXPR.

Done.

> Finally, if !flag_signed_zeros (part of fast-math), we probably
> want to fold -fma(a,b,c) -> fma(-a,b,c).

Hm, that doesn't sound correct to me ;)

The following adds vectorization capabilities as well which fixes the
seen regressions.

Bootstrap and regtest pending.

Richard.

2010-10-22  Richard Guenther  <rguenther@suse.de>
	Richard Henderson  <rth@redhat.com>

	* tree.def (FMA_EXPR): New tree code.
	* expr.c (expand_expr_real_2): Add FMA_EXPR expansion code.
	* gimple.c (gimple_rhs_class_table): FMA_EXPR is a GIMPLE_TERNARY_RHS.
	* tree-cfg.c (verify_gimple_assign_ternary): Verify FMA_EXPR types.
	* tree-inline.c (estimate_operator_cost): Handle FMA_EXPR.
	* gimple-pretty-print.c (dump_ternary_rhs): Likewise.
	* tree-ssa-math-opts.c (convert_mult_to_fma): New function.
	(execute_optimize_widening_mul): Call it.  Reorganize to allow
	dead stmt removal.  Move TODO flags ...
	(pass_optimize_widening_mul): ... here.
	* flag-types.h (enum fp_contract_mode): New enum.
	* common.opt (flag_fp_contract_mode): New variable.
	(-ffp-contract): New option.
	* opts.c (common_handle_option): Handle it.
	* doc/invoke.texi (-ffp-contract): Document.
	* tree.h (fold_fma): Declare.
	* builtins.c (fold_fma): New function.
	(fold_builtin_fma): Likewise.
	(fold_builtin_3): Call it for fma.
	* fold-const.c (fold_ternary_loc): Fold FMA_EXPR.
	* optabs.c (optab_for_tree_code): Handle FMA_EXPR.
	* config/i386/sse.md (fms<mode>4, fnma<mode>, fnms<mode>4):
	New expanders.
	* doc/md.texi (fms<mode>4, fnma<mode>, fnms<mode>4): Document new
	named patterns.
	* genopinit.c (optabs): Initialize fms_optab, fnma_optab and fnms_optab.
	* optabs.h (enum optab_index): Add OTI_fms, OTI_fnma and OTI_fnms.
	(fms_optab, fnma_optab, fnms_optab): New defines.
	* gimplify.c (gimplify_expr): Handle binary truth expressions
	explicitly.  Handle FMA_EXPR.
	* tree-vect-stmts.c (vectorizable_operation): Handle ternary
	operations.

	* gcc.target/i386/fma4-vector-2.c: New testcase.

Index: gcc/tree.def
===================================================================
*** gcc/tree.def.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/tree.def	2010-11-03 13:21:19.000000000 +0100
*************** DEFTREECODE (WIDEN_MULT_PLUS_EXPR, "wide
*** 1092,1097 ****
--- 1092,1103 ----
     is subtracted from t3.  */
  DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3)
  
+ /* Fused multiply-add.
+    All operands and the result are of the same type.  No intermediate
+    rounding is performed after multiplying operand one with operand two
+    before adding operand three.  */
+ DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3)
+ 
  /* Whole vector left/right shift in bits.
     Operand 0 is a vector to be shifted.
     Operand 1 is an integer shift amount in bits.  */
Index: gcc/expr.c
===================================================================
*** gcc/expr.c.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/expr.c	2010-11-03 13:21:19.000000000 +0100
*************** expand_expr_real_2 (sepops ops, rtx targ
*** 7254,7260 ****
    int ignore;
    bool reduce_bit_field;
    location_t loc = ops->location;
!   tree treeop0, treeop1;
  #define REDUCE_BIT_FIELD(expr)	(reduce_bit_field			  \
  				 ? reduce_to_bit_field_precision ((expr), \
  								  target, \
--- 7254,7260 ----
    int ignore;
    bool reduce_bit_field;
    location_t loc = ops->location;
!   tree treeop0, treeop1, treeop2;
  #define REDUCE_BIT_FIELD(expr)	(reduce_bit_field			  \
  				 ? reduce_to_bit_field_precision ((expr), \
  								  target, \
*************** expand_expr_real_2 (sepops ops, rtx targ
*** 7267,7272 ****
--- 7267,7273 ----
  
    treeop0 = ops->op0;
    treeop1 = ops->op1;
+   treeop2 = ops->op2;
  
    /* We should be called only on simple (binary or unary) expressions,
       exactly those that are valid in gimple expressions that aren't
*************** expand_expr_real_2 (sepops ops, rtx targ
*** 7624,7630 ****
      case WIDEN_MULT_PLUS_EXPR:
      case WIDEN_MULT_MINUS_EXPR:
        expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
!       op2 = expand_normal (ops->op2);
        target = expand_widen_pattern_expr (ops, op0, op1, op2,
  					  target, unsignedp);
        return target;
--- 7625,7631 ----
      case WIDEN_MULT_PLUS_EXPR:
      case WIDEN_MULT_MINUS_EXPR:
        expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
!       op2 = expand_normal (treeop2);
        target = expand_widen_pattern_expr (ops, op0, op1, op2,
  					  target, unsignedp);
        return target;
*************** expand_expr_real_2 (sepops ops, rtx targ
*** 7711,7716 ****
--- 7712,7757 ----
        expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
        return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
  
+     case FMA_EXPR:
+       {
+ 	optab opt = fma_optab;
+ 	gimple def0, def2;
+ 
+ 	def0 = get_def_for_expr (treeop0, NEGATE_EXPR);
+ 	def2 = get_def_for_expr (treeop2, NEGATE_EXPR);
+ 
+ 	op0 = op2 = NULL;
+ 
+ 	if (def0 && def2
+ 	    && optab_handler (fnms_optab, mode) != CODE_FOR_nothing)
+ 	  {
+ 	    opt = fnms_optab;
+ 	    op0 = expand_normal (gimple_assign_rhs1 (def0));
+ 	    op2 = expand_normal (gimple_assign_rhs1 (def2));
+ 	  }
+ 	else if (def0
+ 		 && optab_handler (fnma_optab, mode) != CODE_FOR_nothing)
+ 	  {
+ 	    opt = fnma_optab;
+ 	    op0 = expand_normal (gimple_assign_rhs1 (def0));
+ 	  }
+ 	else if (def2
+ 		 && optab_handler (fms_optab, mode) != CODE_FOR_nothing)
+ 	  {
+ 	    opt = fms_optab;
+ 	    op2 = expand_normal (gimple_assign_rhs1 (def2));
+ 	  }
+ 
+ 	if (op0 == NULL)
+ 	  op0 = expand_expr (treeop0, subtarget, VOIDmode, EXPAND_NORMAL);
+ 	if (op2 == NULL)
+ 	  op2 = expand_normal (treeop2);
+ 	op1 = expand_normal (treeop1);
+ 
+ 	return expand_ternary_op (TYPE_MODE (type), opt,
+ 				  op0, op1, op2, target, 0);
+       }
+ 
      case MULT_EXPR:
        /* If this is a fixed-point operation, then we cannot use the code
  	 below because "expand_mult" doesn't support sat/no-sat fixed-point
Index: gcc/gimple.c
===================================================================
*** gcc/gimple.c.orig	2010-11-03 10:55:43.000000000 +0100
--- gcc/gimple.c	2010-11-03 13:21:19.000000000 +0100
*************** get_gimple_rhs_num_ops (enum tree_code c
*** 2529,2535 ****
        || (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS			    \
     : (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS				    \
     : ((SYM) == WIDEN_MULT_PLUS_EXPR					    \
!       || (SYM) == WIDEN_MULT_MINUS_EXPR) ? GIMPLE_TERNARY_RHS		    \
     : ((SYM) == COND_EXPR						    \
        || (SYM) == CONSTRUCTOR						    \
        || (SYM) == OBJ_TYPE_REF						    \
--- 2529,2536 ----
        || (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS			    \
     : (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS				    \
     : ((SYM) == WIDEN_MULT_PLUS_EXPR					    \
!       || (SYM) == WIDEN_MULT_MINUS_EXPR					    \
!       || (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS			    \
     : ((SYM) == COND_EXPR						    \
        || (SYM) == CONSTRUCTOR						    \
        || (SYM) == OBJ_TYPE_REF						    \
Index: gcc/tree-cfg.c
===================================================================
*** gcc/tree-cfg.c.orig	2010-11-02 16:37:53.000000000 +0100
--- gcc/tree-cfg.c	2010-11-03 13:21:19.000000000 +0100
*************** verify_gimple_assign_ternary (gimple stm
*** 3748,3753 ****
--- 3748,3767 ----
  	}
        break;
  
+     case FMA_EXPR:
+       if (!useless_type_conversion_p (lhs_type, rhs1_type)
+ 	  || !useless_type_conversion_p (lhs_type, rhs2_type)
+ 	  || !useless_type_conversion_p (lhs_type, rhs3_type))
+ 	{
+ 	  error ("type mismatch in fused multiply-add expression");
+ 	  debug_generic_expr (lhs_type);
+ 	  debug_generic_expr (rhs1_type);
+ 	  debug_generic_expr (rhs2_type);
+ 	  debug_generic_expr (rhs3_type);
+ 	  return true;
+ 	}
+       break;
+ 
      default:
        gcc_unreachable ();
      }
Index: gcc/tree-inline.c
===================================================================
*** gcc/tree-inline.c.orig	2010-11-03 10:55:43.000000000 +0100
--- gcc/tree-inline.c	2010-11-03 13:21:19.000000000 +0100
*************** estimate_operator_cost (enum tree_code c
*** 3283,3288 ****
--- 3283,3289 ----
      case POINTER_PLUS_EXPR:
      case MINUS_EXPR:
      case MULT_EXPR:
+     case FMA_EXPR:
  
      case ADDR_SPACE_CONVERT_EXPR:
      case FIXED_CONVERT_EXPR:
Index: gcc/gimple-pretty-print.c
===================================================================
*** gcc/gimple-pretty-print.c.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/gimple-pretty-print.c	2010-11-03 13:21:19.000000000 +0100
*************** dump_ternary_rhs (pretty_printer *buffer
*** 400,405 ****
--- 400,413 ----
        pp_character (buffer, '>');
        break;
  
+     case FMA_EXPR:
+       dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
+       pp_string (buffer, " * ");
+       dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
+       pp_string (buffer, " + ");
+       dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
+       break;
+ 
      default:
        gcc_unreachable ();
      }
Index: gcc/tree-ssa-math-opts.c
===================================================================
*** gcc/tree-ssa-math-opts.c.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/tree-ssa-math-opts.c	2010-11-03 13:21:19.000000000 +0100
*************** convert_plusminus_to_widen (gimple_stmt_
*** 1494,1499 ****
--- 1494,1616 ----
    return true;
  }
  
+ /* Combine the multiplication at MUL_STMT with uses in additions and
+    subtractions to form fused multiply-add operations.  Returns true
+    if successful and MUL_STMT should be removed.  */
+ 
+ static bool
+ convert_mult_to_fma (gimple mul_stmt)
+ {
+   tree mul_result = gimple_assign_lhs (mul_stmt);
+   tree type = TREE_TYPE (mul_result);
+   gimple use_stmt, fma_stmt;
+   use_operand_p use_p;
+   imm_use_iterator imm_iter;
+ 
+   if (FLOAT_TYPE_P (type)
+       && flag_fp_contract_mode == FP_CONTRACT_OFF)
+     return false;
+ 
+   /* We don't want to do bitfield reduction ops.  */
+   if (INTEGRAL_TYPE_P (type)
+       && (TYPE_PRECISION (type)
+ 	  != GET_MODE_PRECISION (TYPE_MODE (type))))
+     return false;
+ 
+   /* If the target doesn't support it, don't generate it.  We assume that
+      if fma isn't available then fms, fnma or fnms are not either.  */
+   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+     return false;
+ 
+   /* Make sure that the multiplication statement becomes dead after
+      the transformation, thus that all uses are transformed to FMAs.
+      This means we assume that an FMA operation has the same cost
+      as an addition.  */
+   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
+     {
+       enum tree_code use_code;
+ 
+       use_stmt = USE_STMT (use_p);
+ 
+       if (!is_gimple_assign (use_stmt))
+ 	return false;
+       use_code = gimple_assign_rhs_code (use_stmt);
+       /* ???  We need to handle NEGATE_EXPR to eventually form fnms.  */
+       if (use_code != PLUS_EXPR
+ 	  && use_code != MINUS_EXPR)
+ 	return false;
+ 
+       /* For now restrict this operations to single basic blocks.  In theory
+ 	 we would want to support sinking the multiplication in
+ 	 m = a*b;
+ 	 if ()
+ 	   ma = m + c;
+ 	 else
+ 	   d = m;
+ 	 to form a fma in the then block and sink the multiplication to the
+ 	 else block.  */
+       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
+ 	return false;
+ 
+       /* We can't handle a * b + a * b.  */
+       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
+ 	return false;
+ 
+       /* If the target doesn't support a * b - c then drop the ball.  */
+       if (gimple_assign_rhs1 (use_stmt) == mul_result
+ 	  && use_code == MINUS_EXPR
+ 	  && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ 	return false;
+ 
+       /* If the target doesn't support -a * b + c then drop the ball.  */
+       if (gimple_assign_rhs2 (use_stmt) == mul_result
+ 	  && use_code == MINUS_EXPR
+ 	  && optab_handler (fnma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ 	return false;
+ 
+       /* We don't yet generate -a * b - c below yet.  */
+     }
+ 
+   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
+     {
+       tree addop, mulop1;
+       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+ 
+       mulop1 = gimple_assign_rhs1 (mul_stmt);
+       if (gimple_assign_rhs1 (use_stmt) == mul_result)
+ 	{
+ 	  addop = gimple_assign_rhs2 (use_stmt);
+ 	  /* a * b - c -> a * b + (-c)  */
+ 	  if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ 	    addop = force_gimple_operand_gsi (&gsi,
+ 					      build1 (NEGATE_EXPR,
+ 						      type, addop),
+ 					      true, NULL_TREE, true,
+ 					      GSI_SAME_STMT);
+ 	}
+       else
+ 	{
+ 	  addop = gimple_assign_rhs1 (use_stmt);
+ 	  /* a - b * c -> (-b) * c + a */
+ 	  if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ 	    mulop1 = force_gimple_operand_gsi (&gsi,
+ 					       build1 (NEGATE_EXPR,
+ 						       type, mulop1),
+ 					       true, NULL_TREE, true,
+ 					       GSI_SAME_STMT);
+ 	}
+ 
+       fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
+ 						gimple_assign_lhs (use_stmt),
+ 						mulop1,
+ 						gimple_assign_rhs2 (mul_stmt),
+ 						addop);
+       gsi_replace (&gsi, fma_stmt, true);
+     }
+ 
+   return true;
+ }
+ 
  /* Find integer multiplications where the operands are extended from
     smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
     where appropriate.  */
*************** convert_plusminus_to_widen (gimple_stmt_
*** 1501,1531 ****
  static unsigned int
  execute_optimize_widening_mul (void)
  {
-   bool changed = false;
    basic_block bb;
  
    FOR_EACH_BB (bb)
      {
        gimple_stmt_iterator gsi;
  
!       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
          {
  	  gimple stmt = gsi_stmt (gsi);
  	  enum tree_code code;
  
! 	  if (!is_gimple_assign (stmt))
! 	    continue;
! 
! 	  code = gimple_assign_rhs_code (stmt);
! 	  if (code == MULT_EXPR)
! 	    changed |= convert_mult_to_widen (stmt);
! 	  else if (code == PLUS_EXPR || code == MINUS_EXPR)
! 	    changed |= convert_plusminus_to_widen (&gsi, stmt, code);
  	}
      }
  
!   return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
! 	  | TODO_verify_stmts : 0);
  }
  
  static bool
--- 1618,1662 ----
  static unsigned int
  execute_optimize_widening_mul (void)
  {
    basic_block bb;
  
    FOR_EACH_BB (bb)
      {
        gimple_stmt_iterator gsi;
  
!       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
          {
  	  gimple stmt = gsi_stmt (gsi);
  	  enum tree_code code;
  
! 	  if (is_gimple_assign (stmt))
! 	    {
! 	      code = gimple_assign_rhs_code (stmt);
! 	      switch (code)
! 		{
! 		case MULT_EXPR:
! 		  if (!convert_mult_to_widen (stmt)
! 		      && convert_mult_to_fma (stmt))
! 		    {
! 		      gsi_remove (&gsi, true);
! 		      release_defs (stmt);
! 		      continue;
! 		    }
! 		  break;
! 
! 		case PLUS_EXPR:
! 		case MINUS_EXPR:
! 		  convert_plusminus_to_widen (&gsi, stmt, code);
! 		  break;
! 
! 		default:;
! 		}
! 	    }
! 	  gsi_next (&gsi);
  	}
      }
  
!   return 0;
  }
  
  static bool
*************** struct gimple_opt_pass pass_optimize_wid
*** 1549,1554 ****
    0,					/* properties_provided */
    0,					/* properties_destroyed */
    0,					/* todo_flags_start */
!   0                                     /* todo_flags_finish */
   }
  };
--- 1680,1688 ----
    0,					/* properties_provided */
    0,					/* properties_destroyed */
    0,					/* todo_flags_start */
!   TODO_verify_ssa
!   | TODO_verify_stmts
!   | TODO_dump_func
!   | TODO_update_ssa                     /* todo_flags_finish */
   }
  };
Index: gcc/testsuite/gcc.target/i386/fma4-vector-2.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.target/i386/fma4-vector-2.c	2010-11-03 13:21:19.000000000 +0100
***************
*** 0 ****
--- 1,21 ----
+ /* { dg-do compile } */
+ /* { dg-require-effective-target lp64 } */
+ /* { dg-options "-O2 -mfma4 -ftree-vectorize -mtune=generic" } */
+ 
+ float r[256], s[256];
+ float x[256];
+ float y[256];
+ float z[256];
+ 
+ void foo (void)
+ {
+   int i;
+   for (i = 0; i < 256; ++i)
+     {
+       r[i] = x[i] * y[i] - z[i];
+       s[i] = x[i] * y[i] + z[i];
+     }
+ }
+ 
+ /* { dg-final { scan-assembler "vfmaddps" } } */
+ /* { dg-final { scan-assembler "vfmsubps" } } */
Index: gcc/common.opt
===================================================================
*** gcc/common.opt.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/common.opt	2010-11-03 13:21:19.000000000 +0100
*************** bool flag_warn_unused_result = false
*** 58,63 ****
--- 58,67 ----
  Variable
  int *param_values
  
+ ; Floating-point contraction mode, fast by default.
+ Variable
+ enum fp_contract_mode flag_fp_contract_mode = FP_CONTRACT_FAST
+ 
  ###
  Driver
  
*************** fforward-propagate
*** 857,862 ****
--- 861,870 ----
  Common Report Var(flag_forward_propagate) Optimization
  Perform a forward propagation pass on RTL
  
+ ffp-contract=
+ Common Joined RejectNegative
+ -ffp-contract=[off|on|fast] Perform floating-point expression contraction.
+ 
  ; Nonzero means don't put addresses of constant functions in registers.
  ; Used for compiling the Unix kernel, where strange substitutions are
  ; done on the assembly output.
Index: gcc/doc/invoke.texi
===================================================================
*** gcc/doc/invoke.texi.orig	2010-11-02 16:37:50.000000000 +0100
--- gcc/doc/invoke.texi	2010-11-03 13:21:19.000000000 +0100
*************** Objective-C and Objective-C++ Dialects}.
*** 342,348 ****
  -fdelayed-branch -fdelete-null-pointer-checks -fdse -fdse @gol
  -fearly-inlining -fipa-sra -fexpensive-optimizations -ffast-math @gol
  -ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol
! -fforward-propagate -ffunction-sections @gol
  -fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol
  -fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol
  -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol
--- 342,348 ----
  -fdelayed-branch -fdelete-null-pointer-checks -fdse -fdse @gol
  -fearly-inlining -fipa-sra -fexpensive-optimizations -ffast-math @gol
  -ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol
! -fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol
  -fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol
  -fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol
  -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol
*************** loop unrolling.
*** 5980,5985 ****
--- 5980,5997 ----
  This option is enabled by default at optimization levels @option{-O},
  @option{-O2}, @option{-O3}, @option{-Os}.
  
+ @item -ffp-contract=@var{style}
+ @opindex ffp-contract
+ @option{-ffp-contract=off} disables floating-point expression contraction.
+ @option{-ffp-contract=fast} enables floating-point expression contraction
+ such as forming of fused multiply-add operations if the target has
+ native support for them.
+ @option{-ffp-contract=on} enables floating-point expression contraction
+ if allowed by the language standard.  This is currently not implemented
+ and treated equal to @option{-ffp-contract=off}.
+ 
+ The default is @option{-ffp-contract=fast}.
+ 
  @item -fomit-frame-pointer
  @opindex fomit-frame-pointer
  Don't keep the frame pointer in a register for functions that
*************** an exact implementation of IEEE or ISO r
*** 7816,7822 ****
  math functions. It may, however, yield faster code for programs
  that do not require the guarantees of these specifications.
  Enables @option{-fno-signed-zeros}, @option{-fno-trapping-math},
! @option{-fassociative-math} and @option{-freciprocal-math}.
  
  The default is @option{-fno-unsafe-math-optimizations}.
  
--- 7828,7835 ----
  math functions. It may, however, yield faster code for programs
  that do not require the guarantees of these specifications.
  Enables @option{-fno-signed-zeros}, @option{-fno-trapping-math},
! @option{-fassociative-math}, @option{-freciprocal-math} and
! @option{-ffp-contract=fast}.
  
  The default is @option{-fno-unsafe-math-optimizations}.
  
Index: gcc/opts.c
===================================================================
*** gcc/opts.c.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/opts.c	2010-11-03 13:43:06.000000000 +0100
*************** common_handle_option (struct gcc_options
*** 1901,1906 ****
--- 1901,1918 ----
  	return false;
        break;
  
+     case OPT_ffp_contract_:
+       if (!strcmp (arg, "on"))
+ 	/* Not implemented, fall back to conservative FP_CONTRACT_OFF.  */
+ 	flag_fp_contract_mode = FP_CONTRACT_OFF;
+       else if (!strcmp (arg, "off"))
+ 	flag_fp_contract_mode = FP_CONTRACT_OFF;
+       else if (!strcmp (arg, "fast"))
+ 	flag_fp_contract_mode = FP_CONTRACT_FAST;
+       else
+ 	error ("unknown floating point contraction style \"%s\"", arg);
+       break;
+ 
      case OPT_fexcess_precision_:
        if (!strcmp (arg, "fast"))
  	flag_excess_precision_cmdline = EXCESS_PRECISION_FAST;
Index: gcc/builtins.c
===================================================================
*** gcc/builtins.c.orig	2010-11-03 10:55:43.000000000 +0100
--- gcc/builtins.c	2010-11-03 13:34:38.000000000 +0100
*************** fold_builtin_abs (location_t loc, tree a
*** 9266,9271 ****
--- 9266,9304 ----
    return fold_build1_loc (loc, ABS_EXPR, type, arg);
  }
  
+ /* Fold a fma operation with arguments ARG[012].  */
+ 
+ tree
+ fold_fma (location_t loc, tree type, tree arg0, tree arg1, tree arg2)
+ {
+   if (TREE_CODE (arg0) == REAL_CST
+       && TREE_CODE (arg1) == REAL_CST
+       && TREE_CODE (arg2) == REAL_CST)
+     return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
+ 
+   return NULL_TREE;
+ }
+ 
+ /* Fold a call to fma, fmaf, or fmal with arguments ARG[012].  */
+ 
+ static tree
+ fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type)
+ {
+   if (validate_arg (arg0, REAL_TYPE)
+       && validate_arg(arg1, REAL_TYPE)
+       && validate_arg(arg2, REAL_TYPE))
+     {
+       tree tem = fold_fma (loc, type, arg0, arg1, arg2);
+       if (tem)
+ 	return tem;
+ 
+       /* ??? Only expand to FMA_EXPR if it's directly supported.  */
+       if (optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
+         return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2);
+     }
+   return NULL_TREE;
+ }
+ 
  /* Fold a call to builtin fmin or fmax.  */
  
  static tree
*************** fold_builtin_3 (location_t loc, tree fnd
*** 10540,10549 ****
        return fold_builtin_sincos (loc, arg0, arg1, arg2);
  
      CASE_FLT_FN (BUILT_IN_FMA):
!       if (validate_arg (arg0, REAL_TYPE)
! 	  && validate_arg(arg1, REAL_TYPE)
! 	  && validate_arg(arg2, REAL_TYPE))
! 	return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
      break;
  
      CASE_FLT_FN (BUILT_IN_REMQUO):
--- 10573,10579 ----
        return fold_builtin_sincos (loc, arg0, arg1, arg2);
  
      CASE_FLT_FN (BUILT_IN_FMA):
!       return fold_builtin_fma (loc, arg0, arg1, arg2, type);
      break;
  
      CASE_FLT_FN (BUILT_IN_REMQUO):
Index: gcc/config/i386/sse.md
===================================================================
*** gcc/config/i386/sse.md.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/config/i386/sse.md	2010-11-03 13:21:19.000000000 +0100
***************
*** 1859,1865 ****
  
  ;; Intrinsic FMA operations.
  
! ;; The standard name for fma is only available with SSE math enabled.
  (define_expand "fma<mode>4"
    [(set (match_operand:FMAMODE 0 "register_operand")
  	(fma:FMAMODE
--- 1859,1865 ----
  
  ;; Intrinsic FMA operations.
  
! ;; The standard names for fma is only available with SSE math enabled.
  (define_expand "fma<mode>4"
    [(set (match_operand:FMAMODE 0 "register_operand")
  	(fma:FMAMODE
***************
*** 1869,1874 ****
--- 1869,1901 ----
    "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
    "")
  
+ (define_expand "fms<mode>4"
+   [(set (match_operand:FMAMODE 0 "register_operand")
+ 	(fma:FMAMODE
+ 	  (match_operand:FMAMODE 1 "nonimmediate_operand")
+ 	  (match_operand:FMAMODE 2 "nonimmediate_operand")
+ 	  (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
+   "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+   "")
+ 
+ (define_expand "fnma<mode>4"
+   [(set (match_operand:FMAMODE 0 "register_operand")
+ 	(fma:FMAMODE
+ 	  (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
+ 	  (match_operand:FMAMODE 2 "nonimmediate_operand")
+ 	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+   "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+   "")
+ 
+ (define_expand "fnms<mode>4"
+   [(set (match_operand:FMAMODE 0 "register_operand")
+ 	(fma:FMAMODE
+ 	  (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
+ 	  (match_operand:FMAMODE 2 "nonimmediate_operand")
+ 	  (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
+   "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+   "")
+ 
  ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
  (define_expand "fma4i_fmadd_<mode>"
    [(set (match_operand:FMAMODE 0 "register_operand")
Index: gcc/doc/md.texi
===================================================================
*** gcc/doc/md.texi.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/doc/md.texi	2010-11-03 13:21:19.000000000 +0100
*************** pattern is used to implement the @code{f
*** 3958,3963 ****
--- 3958,3993 ----
  multiply followed by the add if the machine does not perform a
  rounding step between the operations.
  
+ @cindex @code{fms@var{m}4} instruction pattern
+ @item @samp{fms@var{m}4}
+ Like @code{fma@var{m}4}, except operand 3 subtracted from the
+ product instead of added to the product.  This is represented
+ in the rtl as
+ 
+ @smallexample
+ (fma:@var{m} @var{op1} @var{op2} (neg:@var{m} @var{op3}))
+ @end smallexample
+ 
+ @cindex @code{fnma@var{m}4} instruction pattern
+ @item @samp{fnma@var{m}4}
+ Like @code{fma@var{m}4} except that the intermediate product
+ is negated before being added to operand 3.  This is represented
+ in the rtl as
+ 
+ @smallexample
+ (fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} @var{op3})
+ @end smallexample
+ 
+ @cindex @code{fnms@var{m}4} instruction pattern
+ @item @samp{fnms@var{m}4}
+ Like @code{fms@var{m}4} except that the intermediate product
+ is negated before subtracting operand 3.  This is represented
+ in the rtl as
+ 
+ @smallexample
+ (fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} (neg:@var{m} @var{op3}))
+ @end smallexample
+ 
  @cindex @code{min@var{m}3} instruction pattern
  @cindex @code{max@var{m}3} instruction pattern
  @item @samp{smin@var{m}3}, @samp{smax@var{m}3}
Index: gcc/flag-types.h
===================================================================
*** gcc/flag-types.h.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/flag-types.h	2010-11-03 13:21:19.000000000 +0100
*************** enum warn_strict_overflow_code
*** 152,155 ****
--- 152,162 ----
    WARN_STRICT_OVERFLOW_MAGNITUDE = 5
  };
  
+ /* Floating-point contraction mode.  */
+ enum fp_contract_mode {
+   FP_CONTRACT_OFF = 0,
+   FP_CONTRACT_ON = 1,
+   FP_CONTRACT_FAST = 2
+ };
+ 
  #endif /* ! GCC_FLAG_TYPES_H */
Index: gcc/genopinit.c
===================================================================
*** gcc/genopinit.c.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/genopinit.c	2010-11-03 13:21:19.000000000 +0100
*************** static const char * const optabs[] =
*** 160,165 ****
--- 160,168 ----
    "set_optab_handler (floor_optab, $A, CODE_FOR_$(floor$a2$))",
    "set_convert_optab_handler (lfloor_optab, $B, $A, CODE_FOR_$(lfloor$F$a$I$b2$))",
    "set_optab_handler (fma_optab, $A, CODE_FOR_$(fma$a4$))",
+   "set_optab_handler (fms_optab, $A, CODE_FOR_$(fms$a4$))",
+   "set_optab_handler (fnma_optab, $A, CODE_FOR_$(fnma$a4$))",
+   "set_optab_handler (fnms_optab, $A, CODE_FOR_$(fnms$a4$))",
    "set_optab_handler (ceil_optab, $A, CODE_FOR_$(ceil$a2$))",
    "set_convert_optab_handler (lceil_optab, $B, $A, CODE_FOR_$(lceil$F$a$I$b2$))",
    "set_optab_handler (round_optab, $A, CODE_FOR_$(round$a2$))",
Index: gcc/gimplify.c
===================================================================
*** gcc/gimplify.c.orig	2010-11-03 10:55:43.000000000 +0100
--- gcc/gimplify.c	2010-11-03 13:21:19.000000000 +0100
*************** gimplify_expr (tree *expr_p, gimple_seq
*** 7170,7175 ****
--- 7170,7185 ----
  	  ret = gimplify_omp_atomic (expr_p, pre_p);
  	  break;
  
+ 	case TRUTH_AND_EXPR:
+ 	case TRUTH_OR_EXPR:
+ 	case TRUTH_XOR_EXPR:
+ 	  /* Classified as tcc_expression.  */
+ 	  goto expr_2;
+ 
+ 	case FMA_EXPR:
+ 	  /* Classified as tcc_expression.  */
+ 	  goto expr_3;
+ 
  	case POINTER_PLUS_EXPR:
            /* Convert ((type *)A)+offset into &A->field_of_type_and_offset.
  	     The second is gimple immediate saving a need for extra statement.
*************** gimplify_expr (tree *expr_p, gimple_seq
*** 7249,7264 ****
  		break;
  	      }
  
  	    case tcc_declaration:
  	    case tcc_constant:
  	      ret = GS_ALL_DONE;
  	      goto dont_recalculate;
  
  	    default:
! 	      gcc_assert (TREE_CODE (*expr_p) == TRUTH_AND_EXPR
! 			  || TREE_CODE (*expr_p) == TRUTH_OR_EXPR
! 			  || TREE_CODE (*expr_p) == TRUTH_XOR_EXPR);
! 	      goto expr_2;
  	    }
  
  	  recalculate_side_effects (*expr_p);
--- 7259,7286 ----
  		break;
  	      }
  
+ 	    expr_3:
+ 	      {
+ 		enum gimplify_status r0, r1, r2;
+ 
+ 		r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
+ 		                    post_p, is_gimple_val, fb_rvalue);
+ 		r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
+ 				    post_p, is_gimple_val, fb_rvalue);
+ 		r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
+ 				    post_p, is_gimple_val, fb_rvalue);
+ 
+ 		ret = MIN (MIN (r0, r1), r2);
+ 		break;
+ 	      }
+ 
  	    case tcc_declaration:
  	    case tcc_constant:
  	      ret = GS_ALL_DONE;
  	      goto dont_recalculate;
  
  	    default:
! 	      gcc_unreachable ();
  	    }
  
  	  recalculate_side_effects (*expr_p);
Index: gcc/optabs.h
===================================================================
*** gcc/optabs.h.orig	2010-11-02 16:37:35.000000000 +0100
--- gcc/optabs.h	2010-11-03 13:21:19.000000000 +0100
*************** enum optab_index
*** 192,197 ****
--- 192,200 ----
    OTI_atan2,
    /* Floating multiply/add */
    OTI_fma,
+   OTI_fms,
+   OTI_fnma,
+   OTI_fnms,
  
    /* Move instruction.  */
    OTI_mov,
*************** enum optab_index
*** 435,440 ****
--- 438,446 ----
  #define pow_optab (&optab_table[OTI_pow])
  #define atan2_optab (&optab_table[OTI_atan2])
  #define fma_optab (&optab_table[OTI_fma])
+ #define fms_optab (&optab_table[OTI_fms])
+ #define fnma_optab (&optab_table[OTI_fnma])
+ #define fnms_optab (&optab_table[OTI_fnms])
  
  #define mov_optab (&optab_table[OTI_mov])
  #define movstrict_optab (&optab_table[OTI_movstrict])
Index: gcc/fold-const.c
===================================================================
*** gcc/fold-const.c.orig	2010-11-03 10:55:43.000000000 +0100
--- gcc/fold-const.c	2010-11-03 13:50:07.000000000 +0100
*************** contains_label_p (tree st)
*** 13281,13290 ****
  
  tree
  fold_ternary_loc (location_t loc, enum tree_code code, tree type,
! 	      tree op0, tree op1, tree op2)
  {
    tree tem;
!   tree arg0 = NULL_TREE, arg1 = NULL_TREE;
    enum tree_code_class kind = TREE_CODE_CLASS (code);
  
    gcc_assert (IS_EXPR_CODE_CLASS (kind)
--- 13281,13290 ----
  
  tree
  fold_ternary_loc (location_t loc, enum tree_code code, tree type,
! 		  tree op0, tree op1, tree op2)
  {
    tree tem;
!   tree arg0 = NULL_TREE, arg1 = NULL_TREE, arg2 = NULL_TREE;
    enum tree_code_class kind = TREE_CODE_CLASS (code);
  
    gcc_assert (IS_EXPR_CODE_CLASS (kind)
*************** fold_ternary_loc (location_t loc, enum t
*** 13312,13317 ****
--- 13312,13323 ----
        STRIP_NOPS (arg1);
      }
  
+   if (op2)
+     {
+       arg2 = op2;
+       STRIP_NOPS (arg2);
+     }
+ 
    switch (code)
      {
      case COMPONENT_REF:
*************** fold_ternary_loc (location_t loc, enum t
*** 13610,13615 ****
--- 13616,13632 ----
  
        return NULL_TREE;
  
+     case FMA_EXPR:
+       /* For integers we can decompose the FMA if possible.  */
+       if (TREE_CODE (arg0) == INTEGER_CST
+ 	  && TREE_CODE (arg1) == INTEGER_CST)
+ 	return fold_build2_loc (loc, PLUS_EXPR, type,
+ 				const_binop (MULT_EXPR, arg0, arg1), arg2);
+       if (integer_zerop (arg2))
+ 	return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
+ 
+       return fold_fma (loc, type, arg0, arg1, arg2);
+ 
      default:
        return NULL_TREE;
      } /* switch (code) */
Index: gcc/optabs.c
===================================================================
*** gcc/optabs.c.orig	2010-08-18 16:29:14.000000000 +0200
--- gcc/optabs.c	2010-11-03 13:55:11.000000000 +0100
*************** optab_for_tree_code (enum tree_code code
*** 374,379 ****
--- 374,382 ----
  	      : (TYPE_SATURATING (type)
  		 ? ssmsub_widen_optab : smsub_widen_optab));
  
+     case FMA_EXPR:
+       return fma_optab;
+ 
      case REDUC_MAX_EXPR:
        return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;
  
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c.orig	2010-11-03 10:55:43.000000000 +0100
--- gcc/tree-vect-stmts.c	2010-11-03 14:00:55.000000000 +0100
*************** vectorizable_shift (gimple stmt, gimple_
*** 2343,2349 ****
  
  /* Function vectorizable_operation.
  
!    Check if STMT performs a binary or unary operation that can be vectorized.
     If VEC_STMT is also passed, vectorize the STMT: create a vectorized
     stmt to replace it, put it in VEC_STMT, and insert it at BSI.
     Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
--- 2343,2350 ----
  
  /* Function vectorizable_operation.
  
!    Check if STMT performs a binary, unary or ternary operation that can
!    be vectorized.
     If VEC_STMT is also passed, vectorize the STMT: create a vectorized
     stmt to replace it, put it in VEC_STMT, and insert it at BSI.
     Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
*************** vectorizable_operation (gimple stmt, gim
*** 2354,2360 ****
  {
    tree vec_dest;
    tree scalar_dest;
!   tree op0, op1 = NULL;
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    tree vectype;
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
--- 2355,2361 ----
  {
    tree vec_dest;
    tree scalar_dest;
!   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    tree vectype;
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
*************** vectorizable_operation (gimple stmt, gim
*** 2366,2372 ****
    int icode;
    tree def;
    gimple def_stmt;
!   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
    gimple new_stmt = NULL;
    stmt_vec_info prev_stmt_info;
    int nunits_in;
--- 2367,2374 ----
    int icode;
    tree def;
    gimple def_stmt;
!   enum vect_def_type dt[3]
!     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
    gimple new_stmt = NULL;
    stmt_vec_info prev_stmt_info;
    int nunits_in;
*************** vectorizable_operation (gimple stmt, gim
*** 2374,2381 ****
    tree vectype_out;
    int ncopies;
    int j, i;
!   VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
!   tree vop0, vop1;
    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
    int vf;
  
--- 2376,2383 ----
    tree vectype_out;
    int ncopies;
    int j, i;
!   VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
!   tree vop0, vop1, vop2;
    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
    int vf;
  
*************** vectorizable_operation (gimple stmt, gim
*** 2401,2410 ****
  
    /* Support only unary or binary operations.  */
    op_type = TREE_CODE_LENGTH (code);
!   if (op_type != unary_op && op_type != binary_op)
      {
        if (vect_print_dump_info (REPORT_DETAILS))
! 	fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
        return false;
      }
  
--- 2403,2413 ----
  
    /* Support only unary or binary operations.  */
    op_type = TREE_CODE_LENGTH (code);
!   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
      {
        if (vect_print_dump_info (REPORT_DETAILS))
! 	fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
! 		 op_type);
        return false;
      }
  
*************** vectorizable_operation (gimple stmt, gim
*** 2441,2447 ****
    if (nunits_out != nunits_in)
      return false;
  
!   if (op_type == binary_op)
      {
        op1 = gimple_assign_rhs2 (stmt);
        if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
--- 2444,2450 ----
    if (nunits_out != nunits_in)
      return false;
  
!   if (op_type == binary_op || op_type == ternary_op)
      {
        op1 = gimple_assign_rhs2 (stmt);
        if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
*************** vectorizable_operation (gimple stmt, gim
*** 2452,2457 ****
--- 2455,2471 ----
  	  return false;
  	}
      }
+   if (op_type == ternary_op)
+     {
+       op2 = gimple_assign_rhs3 (stmt);
+       if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
+                                &dt[2]))
+ 	{
+ 	  if (vect_print_dump_info (REPORT_DETAILS))
+ 	    fprintf (vect_dump, "use not simple.");
+ 	  return false;
+ 	}
+     }
  
    if (loop_vinfo)
      vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
*************** vectorizable_operation (gimple stmt, gim
*** 2473,2479 ****
        || code == RROTATE_EXPR)
     return false;
  
!  optab = optab_for_tree_code (code, vectype, optab_default);
  
    /* Supportable by target?  */
    if (!optab)
--- 2487,2493 ----
        || code == RROTATE_EXPR)
     return false;
  
!   optab = optab_for_tree_code (code, vectype, optab_default);
  
    /* Supportable by target?  */
    if (!optab)
*************** vectorizable_operation (gimple stmt, gim
*** 2534,2541 ****
    if (!slp_node)
      {
        vec_oprnds0 = VEC_alloc (tree, heap, 1);
!       if (op_type == binary_op)
          vec_oprnds1 = VEC_alloc (tree, heap, 1);
      }
  
    /* In case the vectorization factor (VF) is bigger than the number
--- 2548,2557 ----
    if (!slp_node)
      {
        vec_oprnds0 = VEC_alloc (tree, heap, 1);
!       if (op_type == binary_op || op_type == ternary_op)
          vec_oprnds1 = VEC_alloc (tree, heap, 1);
+       if (op_type == ternary_op)
+         vec_oprnds2 = VEC_alloc (tree, heap, 1);
      }
  
    /* In case the vectorization factor (VF) is bigger than the number
*************** vectorizable_operation (gimple stmt, gim
*** 2597,2618 ****
        /* Handle uses.  */
        if (j == 0)
  	{
! 	  if (op_type == binary_op)
  	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
  			       slp_node);
  	  else
  	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
  			       slp_node);
  	}
        else
! 	vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
  
        /* Arguments are ready.  Create the new vector stmt.  */
        FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
          {
! 	  vop1 = ((op_type == binary_op)
! 		  ? VEC_index (tree, vec_oprnds1, i) : NULL);
! 	  new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
  	  new_temp = make_ssa_name (vec_dest, new_stmt);
  	  gimple_assign_set_lhs (new_stmt, new_temp);
  	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
--- 2613,2652 ----
        /* Handle uses.  */
        if (j == 0)
  	{
! 	  if (op_type == binary_op || op_type == ternary_op)
  	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
  			       slp_node);
  	  else
  	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
  			       slp_node);
+ 	  if (op_type == ternary_op)
+ 	    {
+ 	      vec_oprnds2 = VEC_alloc (tree, heap, 1);
+ 	      VEC_quick_push (tree, vec_oprnds2,
+ 			      vect_get_vec_def_for_operand (op2, stmt, NULL));
+ 	    }
  	}
        else
! 	{
! 	  vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
! 	  if (op_type == ternary_op)
! 	    {
! 	      tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
! 	      VEC_quick_push (tree, vec_oprnds2,
! 			      vect_get_vec_def_for_stmt_copy (dt[2],
! 							      vec_oprnd));
! 	    }
! 	}
  
        /* Arguments are ready.  Create the new vector stmt.  */
        FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
          {
! 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
! 		  ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
! 	  vop2 = ((op_type == ternary_op)
! 		  ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
! 	  new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
! 						    vop0, vop1, vop2);
  	  new_temp = make_ssa_name (vec_dest, new_stmt);
  	  gimple_assign_set_lhs (new_stmt, new_temp);
  	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
*************** vectorizable_operation (gimple stmt, gim
*** 2633,2638 ****
--- 2667,2674 ----
    VEC_free (tree, heap, vec_oprnds0);
    if (vec_oprnds1)
      VEC_free (tree, heap, vec_oprnds1);
+   if (vec_oprnds2)
+     VEC_free (tree, heap, vec_oprnds2);
  
    return true;
  }
Index: gcc/tree.h
===================================================================
*** gcc/tree.h.orig	2010-11-02 11:16:39.000000000 +0100
--- gcc/tree.h	2010-11-03 13:34:12.000000000 +0100
*************** extern void fold_defer_overflow_warnings
*** 4954,4959 ****
--- 4954,4960 ----
  extern void fold_undefer_overflow_warnings (bool, const_gimple, int);
  extern void fold_undefer_and_ignore_overflow_warnings (void);
  extern bool fold_deferring_overflow_warnings_p (void);
+ extern tree fold_fma (location_t, tree, tree, tree, tree);
  
  enum operand_equal_flag
  {


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]