[PATCH i386 4/8] [AVX512] [5/8] Add substed patterns: rounding subst.

Kirill Yukhin kirill.yukhin@gmail.com
Wed Nov 6 07:24:00 GMT 2013


Hello,
On 14 Aug 11:44, Kirill Yukhin wrote:
> Rounding is implemented by adding a const_int value in parallel with the
> original pattern - however, that might be not the best approach.  define_subst
> for this transformation are quite straightforward - we needed three of them to
> cover all patterns we need to transform.  The most interesting part here is how
> to determine which operand corresponds to the rounding immediate. To somehow
> reflect this in the pattern, we allowed using attributes in other attributes,
> i.e. attributes nesting (that's already in the trunk). The problem here arises
> from the next: when subst for rounding is applied to a pattern after subst for
> masking, it's actually applied to two patterns with different number of
> operands. To find that number, we wrote next attributes: (define_subst_attr
> "round_mask_operand2" "mask" "%R2" "%R4") (define_subst_attr "round_mask_op2"
> "round" "" "<round_mask_operand2>") In result, we get either empty string (if
> rounding-subst isn't applied), or "%R2" or "%R4" depending on whether mask-subst
> was applied.

Corrsponding subst implementation is in the bottom.

Bootstrapped.

Is it ok for trunk?

--
Thanks, K

---
 gcc/config/i386/i386.c   |  32 +++
 gcc/config/i386/i386.md  |  10 +
 gcc/config/i386/sse.md   | 642 +++++++++++++++++++++++++----------------------
 gcc/config/i386/subst.md |  41 +++
 4 files changed, 426 insertions(+), 299 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a69e057..e172adf 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14839,6 +14839,38 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	    fputs ("{z}", file);
 	  return;
 
+	case 'R':
+	  gcc_assert (CONST_INT_P (x));
+
+	  if (ASSEMBLER_DIALECT == ASM_INTEL)
+	    fputs (", ", file);
+
+	  switch (INTVAL (x))
+	    {
+	    case ROUND_NEAREST_INT:
+	      fputs ("{rn-sae}", file);
+	      break;
+	    case ROUND_NEG_INF:
+	      fputs ("{rd-sae}", file);
+	      break;
+	    case ROUND_POS_INF:
+	      fputs ("{ru-sae}", file);
+	      break;
+	    case ROUND_ZERO:
+	      fputs ("{rz-sae}", file);
+	      break;
+	    case ROUND_SAE:
+	      fputs ("{sae}", file);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    fputs (", ", file);
+
+	  return;
+
 	case '*':
 	  if (ASSEMBLER_DIALECT == ASM_ATT)
 	    putc ('*', file);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c7ec0c1..344702b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -246,6 +246,16 @@
    (ROUND_NO_EXC		0x8)
   ])
 
+;; Constants to represent AVX512F embeded rounding
+(define_constants
+  [(ROUND_NEAREST_INT			0)
+   (ROUND_NEG_INF			1)
+   (ROUND_POS_INF			2)
+   (ROUND_ZERO				3)
+   (NO_ROUND				4)
+   (ROUND_SAE				5)
+  ])
+
 ;; Constants to represent pcomtrue/pcomfalse variants
 (define_constants
   [(PCOM_FALSE			0)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6b41060..61b96a1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1285,80 +1285,80 @@
 }
   [(set_attr "isa" "noavx,noavx,avx,avx")])
 
-(define_expand "<plusminus_insn><mode>3<mask_name>"
+(define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
   [(set (match_operand:VF 0 "register_operand")
 	(plusminus:VF
 	  (match_operand:VF 1 "nonimmediate_operand")
 	  (match_operand:VF 2 "nonimmediate_operand")))]
-  "TARGET_SSE && <mask_mode512bit_condition>"
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
-(define_insn "*<plusminus_insn><mode>3<mask_name>"
+(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(plusminus:VF
 	  (match_operand:VF 1 "nonimmediate_operand" "<comm>0,v")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
+	  (match_operand:VF 2 "nonimmediate_operand" "xm,<round_constraint>")))]
+  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
-   v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+   v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
    (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name>"
+(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (plusminus:VF_128
 	    (match_operand:VF_128 1 "register_operand" "0,v")
-	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2}"
+   v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_mask_scalar_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
    (set_attr "prefix" "<mask_scalar_prefix>")
    (set_attr "mode" "<ssescalarmode>")])
 
-(define_expand "mul<mode>3<mask_name>"
+(define_expand "mul<mode>3<mask_name><round_name>"
   [(set (match_operand:VF 0 "register_operand")
 	(mult:VF
 	  (match_operand:VF 1 "nonimmediate_operand")
 	  (match_operand:VF 2 "nonimmediate_operand")))]
-  "TARGET_SSE && <mask_mode512bit_condition>"
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
-(define_insn "*mul<mode>3<mask_name>"
+(define_insn "*mul<mode>3<mask_name><round_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(mult:VF
 	  (match_operand:VF 1 "nonimmediate_operand" "%0,v")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
+	  (match_operand:VF 2 "nonimmediate_operand" "xm,<round_constraint>")))]
+  "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    mul<ssemodesuffix>\t{%2, %0|%0, %2}
-   vmul<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+   vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "ssemul")
    (set_attr "prefix" "<mask_prefix3>")
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name>"
+(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (multdiv:VF_128
 	    (match_operand:VF_128 1 "register_operand" "0,v")
-	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2}"
+   v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_mask_scalar_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse<multdiv_mnemonic>")
    (set_attr "prefix" "<mask_scalar_prefix>")
@@ -1391,15 +1391,15 @@
     }
 })
 
-(define_insn "<sse>_div<mode>3<mask_name>"
+(define_insn "<sse>_div<mode>3<mask_name><round_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(div:VF
 	  (match_operand:VF 1 "register_operand" "0,v")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE && <mask_mode512bit_condition>"
+	  (match_operand:VF 2 "nonimmediate_operand" "xm,<round_constraint>")))]
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    div<ssemodesuffix>\t{%2, %0|%0, %2}
-   vdiv<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+   vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "ssediv")
    (set_attr "prefix" "<mask_prefix3>")
@@ -1482,28 +1482,28 @@
     }
 })
 
-(define_insn "<sse>_sqrt<mode>2<mask_name>"
+(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
   [(set (match_operand:VF 0 "register_operand" "=v")
-	(sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))]
-  "TARGET_SSE && <mask_mode512bit_condition>"
-  "%vsqrt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+	(sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "<round_constraint>")))]
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "sse")
    (set_attr "atom_sse_attr" "sqrt")
    (set_attr "btver2_sse_attr" "sqrt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name>"
+(define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (sqrt:VF_128
-	    (match_operand:VF_128 1 "nonimmediate_operand" "xm,vm"))
+	    (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
 	  (match_operand:VF_128 2 "register_operand" "0,v")
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
-   vsqrt<ssescalarmodesuffix>\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1}"
+   vsqrt<ssescalarmodesuffix>\t{<round_mask_scalar_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_mask_scalar_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse")
    (set_attr "atom_sse_attr" "sqrt")
@@ -2752,210 +2752,224 @@
 	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
   "")
 
-(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>"
+(define_expand "avx512f_fmadd_<mode>_maskz"
+  [(match_operand:VF_512 0 "register_operand")
+   (match_operand:VF_512 1 "nonimmediate_operand")
+   (match_operand:VF_512 2 "nonimmediate_operand")
+   (match_operand:VF_512 3 "nonimmediate_operand")
+   (match_operand:<avx512fmaskmode> 4 "register_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_fma_fmadd_<mode>_maskz_1 (
+    operands[0], operands[1], operands[2], operands[3],
+    CONST0_RTX (<MODE>mode), operands[4]));
+  DONE;
+})
+
+(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
-	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
-	  (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
-	  (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
-  "<sd_mask_mode512bit_condition>"
-  "@
-   vfmadd132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
-   vfmadd213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
-   vfmadd231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}
+	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
+	  (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
+	  (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
    vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fmadd_<mode>_mask"
+(define_insn "avx512f_fmadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
 	  (fma:VF_512
 	    (match_operand:VF_512 1 "register_operand" "0,0")
-	    (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
-	    (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))
+	    (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+	    (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
   "TARGET_AVX512F"
   "@
-   vfmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
-   vfmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+   vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fmadd_<mode>_mask3"
+(define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=x")
 	(vec_merge:VF_512
 	  (fma:VF_512
 	    (match_operand:VF_512 1 "register_operand" "x")
-	    (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	    (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
 	    (match_operand:VF_512 3 "register_operand" "0"))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
   "TARGET_AVX512F"
-  "vfmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "isa" "fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>"
+(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
 	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0, v, x,x")
-	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm, v,vm, x,m")
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
 	  (neg:FMAMODE
-	    (match_operand:FMAMODE 3 "nonimmediate_operand" "" v,vm, 0,xm,x"))))]
-  "<sd_mask_mode512bit_condition>"
+	    (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
-   vfmsub132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
-   vfmsub213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
-   vfmsub231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}
+   vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
    vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fmsub_<mode>_mask"
+(define_insn "avx512f_fmsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
 	  (fma:VF_512
 	    (match_operand:VF_512 1 "register_operand" "0,0")
-	    (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
+	    (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
 	    (neg:VF_512
-	      (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")))
+	      (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
   "TARGET_AVX512F"
   "@
-   vfmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
-   vfmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+   vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fmsub_<mode>_mask3"
+(define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(vec_merge:VF_512
 	  (fma:VF_512
 	    (match_operand:VF_512 1 "register_operand" "v")
-	    (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	    (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
 	    (neg:VF_512
 	      (match_operand:VF_512 3 "register_operand" "0")))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
   "TARGET_AVX512F"
-  "vfmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "isa" "fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>"
+(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
 	  (neg:FMAMODE
-	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
-	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm, v,vm, x,m")
-	  (match_operand:FMAMODE   3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
-  "<sd_mask_mode512bit_condition>"
-  "@
-   vfnmadd132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
-   vfnmadd213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
-   vfnmadd231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}
+	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
+	  (match_operand:FMAMODE   3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
    vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fnmadd_<mode>_mask"
+(define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
 	  (fma:VF_512
 	    (neg:VF_512
 	      (match_operand:VF_512 1 "register_operand" "0,0"))
-	    (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
-	    (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))
+	    (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+	    (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
   "TARGET_AVX512F"
   "@
-   vfnmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
-   vfnmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+   vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fnmadd_<mode>_mask3"
+(define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(vec_merge:VF_512
 	  (fma:VF_512
 	    (neg:VF_512
 	      (match_operand:VF_512 1 "register_operand" "v"))
-	    (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	    (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
 	    (match_operand:VF_512 3 "register_operand" "0"))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
   "TARGET_AVX512F"
-  "vfnmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "isa" "fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>"
+(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
 	  (neg:FMAMODE
-	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
-	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm, v,vm, x,m")
+	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
 	  (neg:FMAMODE
-	    (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
-  "<sd_mask_mode512bit_condition>"
+	    (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
-   vfnmsub132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
-   vfnmsub213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
-   vfnmsub231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}
+   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
    vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fnmsub_<mode>_mask"
+(define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
 	  (fma:VF_512
 	    (neg:VF_512
 	      (match_operand:VF_512 1 "register_operand" "0,0"))
-	    (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
+	    (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
 	    (neg:VF_512
-	      (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")))
+	      (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
   "TARGET_AVX512F"
   "@
-   vfnmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
-   vfnmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+   vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fnmsub_<mode>_mask3"
+(define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(vec_merge:VF_512
 	  (fma:VF_512
 	    (neg:VF_512
 	      (match_operand:VF_512 1 "register_operand" "v"))
-	    (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	    (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
 	    (neg:VF_512
 	      (match_operand:VF_512 3 "register_operand" "0")))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
   "TARGET_AVX512F"
-  "vfnmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "isa" "fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -2994,109 +3008,109 @@
   DONE;
 })
 
-(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name>"
+(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
 	(unspec:VF
-	  [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
-	   (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
-	   (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")]
+	  [(match_operand:VF 1 "nonimmediate_operand" "%0,0,v,x,x")
+	   (match_operand:VF 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
+	   (match_operand:VF 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")]
 	  UNSPEC_FMADDSUB))]
-  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition>"
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
-   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
-   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
-   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}
+   vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
    vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fmaddsub_<mode>_mask"
+(define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
 	  (unspec:VF_512
 	    [(match_operand:VF_512 1 "register_operand" "0,0")
-	     (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
-	     (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")]
+	     (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+	     (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")]
 	    UNSPEC_FMADDSUB)
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
   "TARGET_AVX512F"
   "@
-   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
-   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+   vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fmaddsub_<mode>_mask3"
+(define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(vec_merge:VF_512
 	  (unspec:VF_512
 	    [(match_operand:VF_512 1 "register_operand" "v")
-	     (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	     (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
 	     (match_operand:VF_512 3 "register_operand" "0")]
 	    UNSPEC_FMADDSUB)
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
   "TARGET_AVX512F"
-  "vfmaddsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "isa" "fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name>"
+(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
 	(unspec:VF
-	  [(match_operand:VF   1 "nonimmediate_operand" "%0, 0, v, x,x")
-	   (match_operand:VF   2 "nonimmediate_operand" "vm, v,vm, x,m")
+	  [(match_operand:VF   1 "nonimmediate_operand" "%0,0,v,x,x")
+	   (match_operand:VF   2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
 	   (neg:VF
-	     (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))]
+	     (match_operand:VF 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))]
 	  UNSPEC_FMADDSUB))]
-  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition>"
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
-   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
-   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
-   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}
+   vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
    vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fmsubadd_<mode>_mask"
+(define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
 	  (unspec:VF_512
 	    [(match_operand:VF_512 1 "register_operand" "0,0")
-	     (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
+	     (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
 	     (neg:VF_512
-	       (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))]
+	       (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))]
 	    UNSPEC_FMADDSUB)
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
   "TARGET_AVX512F"
   "@
-   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
-   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+   vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
   [(set_attr "isa" "fma_avx512f,fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_fmsubadd_<mode>_mask3"
+(define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(vec_merge:VF_512
 	  (unspec:VF_512
 	    [(match_operand:VF_512 1 "register_operand" "v")
-	     (match_operand:VF_512 2 "nonimmediate_operand" "vm")
+	     (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
 	     (neg:VF_512
 	       (match_operand:VF_512 3 "register_operand" "0"))]
 	    UNSPEC_FMADDSUB)
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
   "TARGET_AVX512F"
-  "vfmsubadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "isa" "fma_avx512f")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -3104,7 +3118,22 @@
 ;; FMA3 floating point scalar intrinsics. These merge result with
 ;; high-order elements from the destination register.
 
-(define_expand "fmai_vmfmadd_<mode>"
+(define_expand "fmai_vmfmadd_<mode>_maskz<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand")
+	      (match_operand:VF_128 2 "nonimmediate_operand")
+	      (match_operand:VF_128 3 "nonimmediate_operand"))
+	    (match_dup <round_opnum>)
+	    (match_operand:QI 4 "register_operand"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "operands[<round_opnum>] = CONST0_RTX (<MODE>mode);")
+
+(define_expand "fmai_vmfmadd_<mode><round_name>"
   [(set (match_operand:VF_128 0 "register_operand")
 	(vec_merge:VF_128
 	  (fma:VF_128
@@ -3115,207 +3144,222 @@
 	  (const_int 1)))]
   "TARGET_FMA")
 
-(define_insn "*fmai_fmadd_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
-        (vec_merge:VF_128
-	  (fma:VF_128
-	    (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
-	    (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
-	    (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
-	  (match_dup 1)
-	  (const_int 1)))]
-  "TARGET_FMA || TARGET_AVX512F"
-  "@
-   vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
-   vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*fmai_fmsub_<mode>_mask"
+(define_insn "fmai_vmfmadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
-	      (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
-          (neg:VF_128
-	        (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	      (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>, v")
+	      (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
 	    (match_dup 1)
 	    (match_operand:QI 4 "register_operand" "k,k"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
   "@
-   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2}
-   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3}"
+   vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+   vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fmai_vmfmsub_<mode>_mask3"
+(define_insn "fmai_vmfmadd_<mode>_mask3<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (match_operand:VF_128 1 "nonimmediate_operand" "%v")
-	      (match_operand:VF_128 2 "nonimmediate_operand" "vm")
-          (neg:VF_128
-	        (match_operand:VF_128 3 "register_operand" "0")))
+	      (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
+	      (match_operand:VF_128 3 "register_operand" "0"))
 	    (match_dup 3)
 	    (match_operand:QI 4 "register_operand" "k"))
 	  (match_dup 3)
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2}"
+  "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_fmsub_<mode>_maskz"
+(define_insn "*fmai_fmadd_<mode>_maskz<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
-	      (match_operand:VF_128 2 "nonimmediate_operand" "vm,v")
-          (neg:VF_128
-	        (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	      (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
+	      (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
 	    (match_operand:VF_128 4 "const0_operand")
 	    (match_operand:QI 5 "register_operand" "k,k"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
   "@
-   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2}
-   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3}"
+   vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %3, %2<round_op6>}
+   vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3<round_op6>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmadd_<mode><round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+        (vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
+	    (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_FMA || TARGET_AVX512F"
+  "@
+   vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+   vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_fmsub_<mode>_mask"
+(define_insn "*fmai_fmsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
-	      (match_operand:VF_128 2 "nonimmediate_operand" "vm,v")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
           (neg:VF_128
-	        (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	        (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
 	    (match_dup 1)
 	    (match_operand:QI 4 "register_operand" "k,k"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
   "@
-   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2}
-   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3}"
+   vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+   vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fmai_vmfmsub_<mode>_mask3<round_name>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand" "%v")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
+          (neg:VF_128
+	        (match_operand:VF_128 3 "register_operand" "0")))
+	    (match_dup 3)
+	    (match_operand:QI 4 "register_operand" "k"))
+	  (match_dup 3)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_fmsub_<mode>_maskz"
+(define_insn "*fmai_fmsub_<mode>_maskz<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
-	      (match_operand:VF_128 2 "nonimmediate_operand" "vm,v")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
           (neg:VF_128
-	        (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	        (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
 	    (match_operand:VF_128 4 "const0_operand")
 	    (match_operand:QI 5 "register_operand" "k,k"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
   "@
-   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2}
-   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3}"
+   vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2<round_op6>}
+   vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3<round_op6>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_vmfnmadd_<mode>_mask"
+(define_insn "*fmai_vmfnmadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (neg:VF_128
-		(match_operand:VF_128 2 "nonimmediate_operand" "vm,v"))
+		(match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
 	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
-	      (match_operand:VF_128 3 "nonimmediate_operand" "v,vm"))
+	      (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
 	    (match_dup 1)
 	    (match_operand:QI 4 "register_operand" "k,k"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
   "@
-   vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2}
-   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3}"
+   vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+   vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_vmfnmadd_<mode>_mask3"
+(define_insn "*fmai_vmfnmadd_<mode>_mask3<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (neg:VF_128
 		(match_operand:VF_128 1 "nonimmediate_operand" "%v"))
-	      (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
 	      (match_operand:VF_128 3 "register_operand" "0"))
 	    (match_dup 3)
 	    (match_operand:QI 4 "register_operand" "k"))
 	  (match_dup 3)
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2}"
+  "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_vmfnmadd_<mode>_maskz"
+(define_insn "*fmai_vmfnmadd_<mode>_maskz<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (neg:VF_128
-		(match_operand:VF_128 2 "nonimmediate_operand" "vm,v"))
+		(match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
 	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
-	      (match_operand:VF_128 3 "nonimmediate_operand" "v,vm"))
+	      (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
 	    (match_operand:VF_128 4 "const0_operand")
 	    (match_operand:QI 5 "register_operand" "k,k"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
   "@
-   vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2}
-   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3}"
+   vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2<round_op6>}
+   vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3<round_op6>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_fnmsub_<mode>_mask"
+(define_insn "*fmai_fnmsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (neg:VF_128
-		(match_operand:VF_128 2 "nonimmediate_operand" "vm,v"))
+		(match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
 	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
 	      (neg:VF_128
-		(match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+		(match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
 	    (match_dup 1)
 	    (match_operand:QI 4 "register_operand" "k,k"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
   "@
-   vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2}
-   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3}"
+   vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+   vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_fnmsub_<mode>_mask3"
+(define_insn "*fmai_fnmsub_<mode>_mask3<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (neg:VF_128
 		(match_operand:VF_128 1 "nonimmediate_operand" "%v"))
-	      (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
 	      (neg:VF_128
 	        (match_operand:VF_128 3 "register_operand" "0")))
 	    (match_dup 3)
@@ -3323,80 +3367,80 @@
 	  (match_dup 3)
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2}"
+  "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_fnmsub_<mode>_maskz"
+(define_insn "*fmai_fnmsub_<mode>_maskz<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
 	(vec_merge:VF_128
 	  (vec_merge:VF_128
 	    (fma:VF_128
 	      (neg:VF_128
-		(match_operand:VF_128 2 "nonimmediate_operand" "vm,v"))
+		(match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
 	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
 	      (neg:VF_128
-	        (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	        (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
 	    (match_operand:VF_128 4 "const0_operand")
 	    (match_operand:QI 5 "register_operand" "k,k"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
   "@
-   vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2}
-   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3}"
+   vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2<round_op6>}
+   vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3<round_op6>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_fmsub_<mode>"
+(define_insn "*fmai_fmsub_<mode><round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
         (vec_merge:VF_128
 	  (fma:VF_128
-	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
-	    (match_operand:VF_128   2 "nonimmediate_operand" "vm, v")
+	    (match_operand:VF_128   1 "nonimmediate_operand" "0,0")
+	    (match_operand:VF_128   2 "nonimmediate_operand" "<round_constraint>,v")
 	    (neg:VF_128
-	      (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
+	      (match_operand:VF_128 3 "nonimmediate_operand" " v,<round_constraint>")))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA || TARGET_AVX512F"
   "@
-   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
-   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+   vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+   vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_fnmadd_<mode>"
+(define_insn "*fmai_fnmadd_<mode><round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
         (vec_merge:VF_128
 	  (fma:VF_128
 	    (neg:VF_128
-	      (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
-	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
-	    (match_operand:VF_128   3 "nonimmediate_operand" " v,vm"))
+	      (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+	    (match_operand:VF_128   1 "nonimmediate_operand" "0,0")
+	    (match_operand:VF_128   3 "nonimmediate_operand" "v,<round_constraint>"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA || TARGET_AVX512F"
   "@
-   vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
-   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+   vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+   vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fmai_fnmsub_<mode>"
+(define_insn "*fmai_fnmsub_<mode><round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
         (vec_merge:VF_128
 	  (fma:VF_128
 	    (neg:VF_128
-	      (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
+	      (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>, v"))
 	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
 	    (neg:VF_128
-	      (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
+	      (match_operand:VF_128 3 "nonimmediate_operand" " v,<round_constraint>")))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA || TARGET_AVX512F"
   "@
-   vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
-   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+   vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+   vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
@@ -3517,18 +3561,18 @@
    (set_attr "prefix_rep" "0")
    (set_attr "mode" "SF")])
 
-(define_insn "sse_cvtsi2ss"
+(define_insn "sse_cvtsi2ss<round_name>"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
-	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
+	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,<round_constraint3>")))
 	  (match_operand:V4SF 1 "register_operand" "0,0,v")
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
    cvtsi2ss\t{%2, %0|%0, %2}
    cvtsi2ss\t{%2, %0|%0, %2}
-   vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
+   vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "vector,double,*")
@@ -3538,18 +3582,18 @@
    (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "SF")])
 
-(define_insn "sse_cvtsi2ssq"
+(define_insn "sse_cvtsi2ssq<round_name>"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
-	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
+	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,<round_constraint3>")))
 	  (match_operand:V4SF 1 "register_operand" "0,0,v")
 	  (const_int 1)))]
   "TARGET_SSE && TARGET_64BIT"
   "@
    cvtsi2ssq\t{%2, %0|%0, %2}
    cvtsi2ssq\t{%2, %0|%0, %2}
-   vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
+   vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "vector,double,*")
@@ -3561,15 +3605,15 @@
    (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "SF")])
 
-(define_insn "sse_cvtss2si"
+(define_insn "sse_cvtss2si<round_name>"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(unspec:SI
 	  [(vec_select:SF
-	     (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
+	     (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_constraint2>")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE"
-  "%vcvtss2si\t{%1, %0|%0, %k1}"
+  "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "bdver1_decode" "double,double")
@@ -3591,15 +3635,15 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SI")])
 
-(define_insn "sse_cvtss2siq"
+(define_insn "sse_cvtss2siq<round_name>"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(unspec:DI
 	  [(vec_select:SF
-	     (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
+	     (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_constraint2>")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE && TARGET_64BIT"
-  "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
+  "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "bdver1_decode" "double,double")
@@ -3653,50 +3697,50 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "DI")])
 
-(define_insn "cvtusi2<ssescalarmodesuffix>32"
+(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (vec_duplicate:VF_128
 	    (unsigned_float:<ssescalarmode>
-	      (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	      (match_operand:SI 2 "nonimmediate_operand" "<round_constraint3>")))
 	  (match_operand:VF_128 1 "register_operand" "v")
 	  (const_int 1)))]
-  "TARGET_AVX512F"
-  "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "TARGET_AVX512F && <round_modev4sf_condition>"
+  "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<ssescalarmode>")])
 
-(define_insn "cvtusi2<ssescalarmodesuffix>64"
+(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (vec_duplicate:VF_128
 	    (unsigned_float:<ssescalarmode>
-	      (match_operand:DI 2 "nonimmediate_operand" "rm")))
+	      (match_operand:DI 2 "nonimmediate_operand" "<round_constraint3>")))
 	  (match_operand:VF_128 1 "register_operand" "v")
 	  (const_int 1)))]
   "TARGET_AVX512F && TARGET_64BIT"
-  "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<ssescalarmode>")])
 
-(define_insn "float<sseintvecmodelower><mode>2<mask_name>"
+(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
   [(set (match_operand:VF1 0 "register_operand" "=v")
 	(float:VF1
-	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
-  "TARGET_SSE2 && <mask_mode512bit_condition>"
-  "%vcvtdq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
+  "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "ufloatv16siv16sf2<mask_name>"
+(define_insn "ufloatv16siv16sf2<mask_name><round_name>"
   [(set (match_operand:V16SF 0 "register_operand" "=v")
 	(unsigned_float:V16SF
-	  (match_operand:V16SI 1 "nonimmediate_operand" "vm")))]
+	  (match_operand:V16SI 1 "nonimmediate_operand" "<round_constraint>")))]
   "TARGET_AVX512F"
-  "vcvtudq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V16SF")])
@@ -3731,24 +3775,24 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name>"
+(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(unspec:V16SI
-	  [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
+	  [(match_operand:V16SF 1 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_AVX512F"
-  "vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name>"
+(define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(unspec:V16SI
-	  [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
+	  [(match_operand:V16SF 1 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
   "TARGET_AVX512F"
-  "vcvtps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -3866,18 +3910,18 @@
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "DF")])
 
-(define_insn "sse2_cvtsi2sdq"
+(define_insn "sse2_cvtsi2sdq<round_name>"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
 	(vec_merge:V2DF
 	  (vec_duplicate:V2DF
-	    (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
+	    (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,<round_constraint3>")))
 	  (match_operand:V2DF 1 "register_operand" "0,0,v")
 	  (const_int 1)))]
   "TARGET_SSE2 && TARGET_64BIT"
   "@
    cvtsi2sdq\t{%2, %0|%0, %2}
    cvtsi2sdq\t{%2, %0|%0, %2}
-   vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
+   vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,direct,*")
@@ -3888,28 +3932,28 @@
    (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "DF")])
 
-(define_insn "avx512f_vcvtss2usi"
+(define_insn "avx512f_vcvtss2usi<round_name>"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
 	  [(vec_select:SF
-	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+	     (match_operand:V4SF 1 "nonimmediate_operand" "<round_constraint>")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
   "TARGET_AVX512F"
-  "vcvtss2usi\t{%1, %0|%0, %1}"
+  "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "SI")])
 
-(define_insn "avx512f_vcvtss2usiq"
+(define_insn "avx512f_vcvtss2usiq<round_name>"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(unspec:DI
 	  [(vec_select:SF
-	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+	     (match_operand:V4SF 1 "nonimmediate_operand" "<round_constraint>")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
   "TARGET_AVX512F && TARGET_64BIT"
-  "vcvtss2usi\t{%1, %0|%0, %1}"
+  "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "DI")])
@@ -3938,28 +3982,28 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "DI")])
 
-(define_insn "avx512f_vcvtsd2usi"
+(define_insn "avx512f_vcvtsd2usi<round_name>"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
 	  [(vec_select:DF
-	     (match_operand:V2DF 1 "nonimmediate_operand" "vm")
+	     (match_operand:V2DF 1 "nonimmediate_operand" "<round_constraint>")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
   "TARGET_AVX512F"
-  "vcvtsd2usi\t{%1, %0|%0, %1}"
+  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "SI")])
 
-(define_insn "avx512f_vcvtsd2usiq"
+(define_insn "avx512f_vcvtsd2usiq<round_name>"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(unspec:DI
 	  [(vec_select:DF
-	     (match_operand:V2DF 1 "nonimmediate_operand" "vm")
+	     (match_operand:V2DF 1 "nonimmediate_operand" "<round_constraint>")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
   "TARGET_AVX512F && TARGET_64BIT"
-  "vcvtsd2usi\t{%1, %0|%0, %1}"
+  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "DI")])
@@ -3988,15 +4032,15 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "DI")])
 
-(define_insn "sse2_cvtsd2si"
+(define_insn "sse2_cvtsd2si<round_name>"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(unspec:SI
 	  [(vec_select:DF
-	     (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
+	     (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_constraint2>")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE2"
-  "%vcvtsd2si\t{%1, %0|%0, %q1}"
+  "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "bdver1_decode" "double,double")
@@ -4019,15 +4063,15 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SI")])
 
-(define_insn "sse2_cvtsd2siq"
+(define_insn "sse2_cvtsd2siq<round_name>"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(unspec:DI
 	  [(vec_select:DF
-	     (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
+	     (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_constraint2>")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE2 && TARGET_64BIT"
-  "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
+  "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "bdver1_decode" "double,double")
@@ -4147,13 +4191,13 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2DF")])
 
-(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name>"
+(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
   [(set (match_operand:V8SI 0 "register_operand" "=v")
 	(unspec:V8SI
-	  [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
+	  [(match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_AVX512F"
-  "vcvtpd2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "OI")])
@@ -4221,13 +4265,13 @@
    (set_attr "athlon_decode" "vector")
    (set_attr "bdver1_decode" "double")])
 
-(define_insn "avx512f_ufix_notruncv8dfv8si<mask_name>"
+(define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
   [(set (match_operand:V8SI 0 "register_operand" "=v")
 	(unspec:V8SI
-	  [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
+	  [(match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
   "TARGET_AVX512F"
-  "vcvtpd2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "OI")])
@@ -4298,19 +4342,19 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_cvtsd2ss<mask_scalar_name>"
+(define_insn "sse2_cvtsd2ss<mask_scalar_name><round_name>"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
 	    (float_truncate:V2SF
-	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,vm")))
+	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
 	  (match_operand:V4SF 1 "register_operand" "0,0,v")
 	  (const_int 1)))]
   "TARGET_SSE2"
   "@
    cvtsd2ss\t{%2, %0|%0, %2}
    cvtsd2ss\t{%2, %0|%0, %q2}
-   vcvtsd2ss\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %q2}"
+   vcvtsd2ss\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %q2<round_mask_scalar_op3>}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssecvt")
    (set_attr "athlon_decode" "vector,double,*")
@@ -4343,12 +4387,12 @@
    (set_attr "prefix" "orig,orig,<mask_scalar_prefix2>")
    (set_attr "mode" "DF")])
 
-(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name>"
+(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
   [(set (match_operand:V8SF 0 "register_operand" "=v")
 	(float_truncate:V8SF
-	  (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
+	  (match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")))]
   "TARGET_AVX512F"
-  "vcvtpd2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V8SF")])
@@ -6695,28 +6739,28 @@
   operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
 })
 
-(define_insn "<mask_scalar_codefor>avx512f_vmscalef<mode><mask_scalar_name>"
+(define_insn "<mask_scalar_codefor>avx512f_vmscalef<mode><mask_scalar_name><round_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
 	    [(match_operand:VF_128 1 "register_operand" "v")
-	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+	     (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")]
 	    UNSPEC_SCALEF)
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
+  "%vscalef<ssescalarmodesuffix>\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_mask_scalar_op3>}"
   [(set_attr "prefix" "evex")
    (set_attr "mode"  "<ssescalarmode>")])
 
-(define_insn "avx512f_scalef<mode><mask_name>"
+(define_insn "avx512f_scalef<mode><mask_name><round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
 	  [(match_operand:VF_512 1 "register_operand" "v")
-	   (match_operand:VF_512 2 "nonimmediate_operand" "vm")]
+	   (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_SCALEF))]
   "TARGET_AVX512F"
-  "%vscalef<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  "%vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
   [(set_attr "prefix" "evex")
    (set_attr "mode"  "<MODE>")])
 
@@ -8446,22 +8490,22 @@
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "<code><mode>3<mask_name>"
+(define_expand "<code><mode>3<mask_name><round_name>"
   [(set (match_operand:VI124_256_48_512 0 "register_operand")
 	(maxmin:VI124_256_48_512
 	  (match_operand:VI124_256_48_512 1 "nonimmediate_operand")
 	  (match_operand:VI124_256_48_512 2 "nonimmediate_operand")))]
-  "TARGET_AVX2 && <mask_mode512bit_condition>"
+  "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
-(define_insn "*avx2_<code><mode>3<mask_name>"
+(define_insn "*avx2_<code><mode>3<mask_name><round_name>"
   [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
 	(maxmin:VI124_256_48_512
 	  (match_operand:VI124_256_48_512 1 "nonimmediate_operand" "%v")
-	  (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "vm")))]
+	  (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "<round_constraint>")))]
   "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
-   && <mask_mode512bit_condition>"
-  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+   && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
   [(set_attr "type" "sseiadd")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_evex")
@@ -12743,33 +12787,33 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_insn "avx512er_exp2<mode><mask_name>"
+(define_insn "avx512er_exp2<mode><mask_name><round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
-	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_EXP2))]
   "TARGET_AVX512ER"
-  "vexp2<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  "vexp2<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name>"
+(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
-	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_RCP28))]
   "TARGET_AVX512ER"
-  "vrcp28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  "vrcp28<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name>"
+(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
-	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_RSQRT28))]
   "TARGET_AVX512ER"
-  "vrsqrt28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  "vrsqrt28<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index f81741f..3e6e788 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -30,6 +30,16 @@
 (define_mode_iterator SUBST_S
   [QI HI SI DI])
 
+(define_mode_iterator SUBST_A
+  [V16QI
+   V16HI V8HI
+   V16SI V8SI  V4SI
+   V8DI  V4DI  V2DI
+   V16SF V8SF  V4SF
+   V8DF  V4DF  V2DF
+   QI HI SI DI SF DF
+   CCFP CCFPU])
+
 (define_subst_attr "mask_name" "mask" "" "_mask")
 (define_subst_attr "mask_applied" "mask" "false" "true")
 (define_subst_attr "mask_operand2" "mask" "" "%{%3%}%N2")
@@ -110,3 +120,34 @@
 	 (match_operand:SUBST_V 2 "const0_operand" "C")
 	 (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))
 ])
+
+(define_subst_attr "round_name" "round" "" "_round")
+(define_subst_attr "round_mask_operand2" "mask" "%R2" "%R4")
+(define_subst_attr "round_mask_operand3" "mask" "%R3" "%R5")
+(define_subst_attr "round_mask_scalar_operand3" "mask_scalar" "%R3" "%R5")
+(define_subst_attr "round_sd_mask_operand4" "sd" "%R4" "%R6")
+(define_subst_attr "round_op2" "round" "" "%R2")
+(define_subst_attr "round_op3" "round" "" "%R3")
+(define_subst_attr "round_op4" "round" "" "%R4")
+(define_subst_attr "round_op5" "round" "" "%R5")
+(define_subst_attr "round_op6" "round" "" "%R6")
+(define_subst_attr "round_mask_op2" "round" "" "<round_mask_operand2>")
+(define_subst_attr "round_mask_op3" "round" "" "<round_mask_operand3>")
+(define_subst_attr "round_mask_scalar_op3" "round" "" "<round_mask_scalar_operand3>")
+(define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>")
+(define_subst_attr "round_constraint" "round" "vm" "v")
+(define_subst_attr "round_constraint2" "round" "m" "v")
+(define_subst_attr "round_constraint3" "round" "rm" "r")
+(define_subst_attr "round_mode512bit_condition" "round" "1" "(GET_MODE (operands[0]) == V16SFmode || GET_MODE (operands[0]) == V8DFmode)")
+(define_subst_attr "round_modev4sf_condition" "round" "1" "(GET_MODE (operands[0]) == V4SFmode)")
+(define_subst_attr "round_codefor" "round" "*" "")
+(define_subst_attr "round_opnum" "round" "5" "6")
+
+(define_subst "round"
+  [(set (match_operand:SUBST_A 0)
+        (match_operand:SUBST_A 1))]
+  "TARGET_AVX512F"
+  [(parallel[
+     (set (match_dup 0)
+          (match_dup 1))
+     (unspec [(match_operand:SI 2 "const_0_to_4_operand")] UNSPEC_EMBEDDED_ROUNDING)])])



More information about the Gcc-patches mailing list