This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Support 3Dnow builtins


This is another patch that was mainly written by Graham Stott.  It
adds support for 3Dnow builtins to the i386 backend.

Notes:
 - There are two versions of 3Dnow, the original K6 version, and an
   extended version that has a few new 3Dnow instructions, and copies
   a few of the Pentium-III's SSE instructions (those that operate on
   MMX registers).  Whether you get the original or the extended
   version depends on whether you use -march=athlon or not.
 - Documentation for -mmmx/-msse/-m3dnow is missing.  I'm working on it,
   it should be ready tomorrow.

Bootstrapped on i686-linux.  I've done a very brief sanity check to verify
that it does indeed allow 3Dnow instructions to be used.


Bernd

        * c-common.c (type_for_mode): Add support for V2SFmode.
        * tree.c (build_common_tree_nodes_2): Likewise.
        * tree.h (enum tree_index, global_trees): Likewise.
        * config/i386/i386.c (x86_3dnow_a): New variable.
        (override_options): Support 3Dnow extensions.
        (bdesc_2arg, bdesc_1arg): Some SSE instructions are also part of
        Athlon's version of 3Dnow.
        (ix86_init_mmx_sse_builtins): Create 3Dnow builtins.
        (ix86_expand_builtin): Handle them.
	(ix86_hard_regno_mode_ok): Add support for V2SFmode on MMX regs.
        * config/i386/i386.h (MASK_3DNOW, MASK_3DNOW_A, TARGET_3DNOW,
        TARGET_3DNOW_A): New macros.
        (TARGET_SWITCHES): Add 3Dnow switches.
        (VALID_MMX_REG_MODE_3DNOW): New macro.
        (VECTOR_MODE_SUPPORTED_P): Use it.
        (enum ix86_builtins): Add entries for 3Dnow builtins.
        * config/i386/i386.md (movv2sf_internal, movv2sf, pushv2sf, pf2id,
        pf2iw, addv2sf3, subv2sf3, subrv2sf3, gtv2sf3, gev2sf3, eqv2sf3,
        pfmaxv23sf3, pfminv2sf3, mulv2sf3, femms, prefetch_3dnow, prefetchw,
        pfacc, pfnacc, pfpnacc, pi2fw, floatv2si2, pavgusb, pfrcpv2sf2,
        pfrcpit1v2sf3, pfrcpit2v2sf3, pfrsqrtv2sf2, pfrsqit1v2sf3,
        pmulhrwvhi3, pswapdv2si2, pswapdv2sf2): New patterns.
        (mmx_pmovmskb, mmx_maskmovq, sse_movntdi, umulv4hi3_highpart,
        mmx_uavgv8qi3, mmx_uavgv4hi3, mmx_psadbw, mmx_pinsrw, mmx_pextrw,
        mmx_pshufw, umaxv8qi3, smaxv4hi3, uminv8qi3, sminv4hi3, sfence,
        sfence_insn, prefetch): Make these available if TARGET_SSE or
        TARGET_3DNOW_A.

Index: c-common.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/c-common.c,v
retrieving revision 1.255
diff -u -p -r1.255 c-common.c
--- c-common.c	2001/09/22 15:13:42	1.255
+++ c-common.c	2001/09/28 17:41:04
@@ -1347,6 +1347,8 @@ type_for_mode (mode, unsignedp)
     return V4HI_type_node;
   if (mode == TYPE_MODE (V8QI_type_node) && VECTOR_MODE_SUPPORTED_P (mode))
     return V8QI_type_node;
+  if (mode == TYPE_MODE (V2SF_type_node) && VECTOR_MODE_SUPPORTED_P (mode))
+    return V2SF_type_node;
 #endif

   return 0;
Index: tree.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/tree.c,v
retrieving revision 1.212
diff -u -p -r1.212 tree.c
--- tree.c	2001/09/24 22:58:27	1.212
+++ tree.c	2001/09/28 17:41:04
@@ -4877,4 +4877,9 @@ build_common_tree_nodes_2 (short_double)
   TREE_TYPE (V8QI_type_node) = intQI_type_node;
   TYPE_MODE (V8QI_type_node) = V8QImode;
   finish_vector_type (V8QI_type_node);
+
+  V2SF_type_node = make_node (VECTOR_TYPE);
+  TREE_TYPE (V2SF_type_node) = float_type_node;
+  TYPE_MODE (V2SF_type_node) = V2SFmode;
+  finish_vector_type (V2SF_type_node);
 }
Index: tree.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/tree.h,v
retrieving revision 1.268
diff -u -p -r1.268 tree.h
--- tree.h	2001/09/22 15:13:42	1.268
+++ tree.h	2001/09/28 17:41:05
@@ -1846,6 +1846,7 @@ enum tree_index
   TI_V8QI_TYPE,
   TI_V4HI_TYPE,
   TI_V2SI_TYPE,
+  TI_V2SF_TYPE,

   TI_MAIN_IDENTIFIER,

@@ -1911,6 +1912,7 @@ extern tree global_trees[TI_MAX];
 #define V8QI_type_node			global_trees[TI_V8QI_TYPE]
 #define V4HI_type_node			global_trees[TI_V4HI_TYPE]
 #define V2SI_type_node			global_trees[TI_V2SI_TYPE]
+#define V2SF_type_node			global_trees[TI_V2SF_TYPE]

 /* An enumeration of the standard C integer types.  These must be
    ordered so that shorter types appear before longer ones.  */
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.310
diff -u -p -r1.310 i386.c
--- config/i386/i386.c	2001/09/28 17:20:22	1.310
+++ config/i386/i386.c	2001/09/28 17:41:06
@@ -323,6 +323,7 @@ const int x86_double_with_add = ~m_386;
 const int x86_use_bit_test = m_386;
 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
+const int x86_3dnow_a = m_ATHLON;
 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
 const int x86_branch_hints = m_PENT4;
 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
@@ -988,6 +989,15 @@ override_options ()
   if (TARGET_SSE)
     target_flags |= MASK_MMX;

+  /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
+  if (TARGET_3DNOW)
+    {
+      target_flags |= MASK_MMX;
+      /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
+	 extensions it adds.  */
+      if (x86_3dnow_a & (1 << ix86_arch))
+	target_flags |= MASK_3DNOW_A;
+    }
   if ((x86_accumulate_outgoing_args & CPUMASK)
       && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
       && !optimize_size)
@@ -10731,15 +10741,15 @@ static struct builtin_description bdesc_

   { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
   { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
-  { MASK_SSE, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },

   { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },

-  { MASK_SSE, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
-  { MASK_SSE, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },

   { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
   { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
@@ -10748,10 +10758,10 @@ static struct builtin_description bdesc_
   { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
   { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },

-  { MASK_SSE, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
-  { MASK_SSE, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
-  { MASK_SSE, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
-  { MASK_SSE, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },

   { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
@@ -10794,7 +10804,7 @@ static struct builtin_description bdesc_

 static struct builtin_description bdesc_1arg[] =
 {
-  { MASK_SSE, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
   { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },

   { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
@@ -11034,6 +11044,40 @@ ix86_init_mmx_sse_builtins ()
 						 long_long_unsigned_type_node,
 						 endlink)));

+  tree v2si_ftype_v2sf
+    = build_function_type (V2SI_type_node,
+                           tree_cons (NULL_TREE, V2SF_type_node,
+                                      endlink));
+  tree v2sf_ftype_v2si
+    = build_function_type (V2SF_type_node,
+                           tree_cons (NULL_TREE, V2SI_type_node,
+                                      endlink));
+  tree v2si_ftype_v2si
+    = build_function_type (V2SI_type_node,
+                           tree_cons (NULL_TREE, V2SI_type_node,
+                                      endlink));
+  tree v2sf_ftype_v2sf
+    = build_function_type (V2SF_type_node,
+                           tree_cons (NULL_TREE, V2SF_type_node,
+                                      endlink));
+  tree v2sf_ftype_v2sf_v2sf
+    = build_function_type (V2SF_type_node,
+                           tree_cons (NULL_TREE, V2SF_type_node,
+                                      tree_cons (NULL_TREE,
+                                                 V2SF_type_node,
+                                                 endlink)));
+  tree v2si_ftype_v2sf_v2sf
+    = build_function_type (V2SI_type_node,
+                           tree_cons (NULL_TREE, V2SF_type_node,
+                                      tree_cons (NULL_TREE,
+                                                 V2SF_type_node,
+                                                 endlink)));
+
+  tree void_ftype_pchar
+    = build_function_type (void_type_node,
+                           tree_cons (NULL_TREE, pchar_type_node,
+                                      endlink));
+
   /* Add all builtins that are more or less simple operations on two
      operands.  */
   for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
@@ -11047,9 +11091,6 @@ ix86_init_mmx_sse_builtins ()
 	continue;
       mode = insn_data[d->icode].operand[1].mode;

-      if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
-	continue;
-
       switch (mode)
 	{
 	case V4SFmode:
@@ -11121,10 +11162,10 @@ ix86_init_mmx_sse_builtins ()
   def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
   def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);

-  def_builtin (MASK_SSE, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
-  def_builtin (MASK_SSE, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);

-  def_builtin (MASK_SSE, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);

   def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
   def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
@@ -11139,14 +11180,14 @@ ix86_init_mmx_sse_builtins ()
   def_builtin (MASK_SSE, "__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);

   def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
-  def_builtin (MASK_SSE, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
   def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
-  def_builtin (MASK_SSE, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);

-  def_builtin (MASK_SSE, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
-  def_builtin (MASK_SSE, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);

-  def_builtin (MASK_SSE, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);

   def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
   def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
@@ -11157,6 +11198,38 @@ ix86_init_mmx_sse_builtins ()

   def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);

+  /* Original 3DNow!  */
+  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
+
+  /* 3DNow! extension as used in the Athlon CPU.  */
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
+
   /* Composite intrinsics.  */
   def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
   def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
@@ -11179,7 +11252,7 @@ safe_vector_operand (x, mode)
     return x;
   x = gen_reg_rtx (mode);

-  if (VALID_MMX_REG_MODE (mode))
+  if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
     emit_insn (gen_mmx_clrdi (mode == DImode ? x
 			      : gen_rtx_SUBREG (DImode, x, 0)));
   else
@@ -11739,6 +11812,107 @@ ix86_expand_builtin (exp, target, subtar
       emit_insn (pat);
       return target;

+    case IX86_BUILTIN_FEMMS:
+      emit_insn (gen_femms ());
+      return NULL_RTX;
+
+    case IX86_BUILTIN_PAVGUSB:
+      return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
+
+    case IX86_BUILTIN_PF2ID:
+      return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
+
+    case IX86_BUILTIN_PFACC:
+      return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
+
+    case IX86_BUILTIN_PFADD:
+     return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFCMPEQ:
+      return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFCMPGE:
+      return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFCMPGT:
+      return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFMAX:
+      return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFMIN:
+      return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFMUL:
+      return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFRCP:
+      return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
+
+    case IX86_BUILTIN_PFRCPIT1:
+      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFRCPIT2:
+      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFRSQIT1:
+      return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFRSQRT:
+      return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
+
+    case IX86_BUILTIN_PFSUB:
+      return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFSUBR:
+      return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PI2FD:
+      return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
+
+    case IX86_BUILTIN_PMULHRW:
+      return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
+
+    case IX86_BUILTIN_PREFETCH_3DNOW:
+      icode = CODE_FOR_prefetch_3dnow;
+      arg0 = TREE_VALUE (arglist);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      mode0 = insn_data[icode].operand[0].mode;
+      pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
+      if (! pat)
+        return NULL_RTX;
+      emit_insn (pat);
+      return NULL_RTX;
+
+    case IX86_BUILTIN_PREFETCHW:
+      icode = CODE_FOR_prefetchw;
+      arg0 = TREE_VALUE (arglist);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      mode0 = insn_data[icode].operand[0].mode;
+      pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
+      if (! pat)
+        return NULL_RTX;
+      emit_insn (pat);
+      return NULL_RTX;
+
+    case IX86_BUILTIN_PF2IW:
+      return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
+
+    case IX86_BUILTIN_PFNACC:
+      return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
+
+    case IX86_BUILTIN_PFPNACC:
+      return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
+
+    case IX86_BUILTIN_PI2FW:
+      return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
+
+    case IX86_BUILTIN_PSWAPDSI:
+      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
+
+    case IX86_BUILTIN_PSWAPDSF:
+      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
+
       /* Composite intrinsics.  */
     case IX86_BUILTIN_SETPS1:
       target = assign_386_stack_local (SFmode, 0);
@@ -12055,7 +12229,7 @@ ix86_hard_regno_mode_ok (regno, mode)
   if (SSE_REGNO_P (regno))
     return VALID_SSE_REG_MODE (mode);
   if (MMX_REGNO_P (regno))
-    return VALID_MMX_REG_MODE (mode);
+    return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
   /* We handle both integer and floats in the general purpose registers.
      In future we should be able to handle vector modes as well.  */
   if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.205
diff -u -p -r1.205 i386.h
--- config/i386/i386.h	2001/09/28 17:20:23	1.205
+++ config/i386/i386.h	2001/09/28 17:41:06
@@ -118,10 +118,12 @@ extern int target_flags;
 #define MASK_MMX		0x00020000	/* Support MMX regs/builtins */
 #define MASK_SSE		0x00040000	/* Support SSE regs/builtins */
 #define MASK_SSE2		0x00080000	/* Support SSE2 regs/builtins */
-#define MASK_128BIT_LONG_DOUBLE 0x00100000	/* long double size is 128bit */
-#define MASK_MIX_SSE_I387	0x00200000	/* Mix SSE and i387 instructions */
-#define MASK_64BIT		0x00400000	/* Produce 64bit code */
-#define MASK_NO_RED_ZONE	0x00800000	/* Do not use red zone */
+#define MASK_3DNOW		0x00100000	/* Support 3Dnow builtins */
+#define MASK_3DNOW_A		0x00200000	/* Support Athlon 3Dnow builtins */
+#define MASK_128BIT_LONG_DOUBLE 0x00400000	/* long double size is 128bit */
+#define MASK_MIX_SSE_I387	0x00800000	/* Mix SSE and i387 instructions */
+#define MASK_64BIT		0x01000000	/* Produce 64bit code */
+#define MASK_NO_RED_ZONE	0x02000000	/* Do not use red zone */

 /* Temporary codegen switches */
 #define MASK_INTEL_SYNTAX	0x00000200
@@ -264,6 +266,8 @@ extern const int x86_epilogue_using_move
 #define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0)
 #define TARGET_MIX_SSE_I387 ((target_flags & MASK_MIX_SSE_I387) != 0)
 #define TARGET_MMX ((target_flags & MASK_MMX) != 0)
+#define TARGET_3DNOW ((target_flags & MASK_3DNOW) != 0)
+#define TARGET_3DNOW_A ((target_flags & MASK_3DNOW_A) != 0)

 #define TARGET_RED_ZONE (!(target_flags & MASK_NO_RED_ZONE))

@@ -335,6 +339,10 @@ extern const int x86_epilogue_using_move
   { "mmx",			 MASK_MMX, N_("Support MMX builtins") },      \
   { "no-mmx",			-MASK_MMX,				      \
     N_("Do not support MMX builtins") },				      \
+  { "3dnow",                     MASK_3DNOW,				      \
+    N_("Support 3DNow! builtins") },					      \
+  { "no-3dnow",                 -MASK_3DNOW,				      \
+    N_("Do not support 3DNow! builtins") },				      \
   { "sse",			 MASK_SSE,				      \
     N_("Support MMX and SSE builtins and code generation") },		      \
   { "no-sse",			-MASK_SSE,				      \
@@ -918,13 +926,17 @@ extern int ix86_arch;
      || (MODE) == SFmode \
      || (TARGET_SSE2 && ((MODE) == DFmode || VALID_MMX_REG_MODE (MODE))))

+#define VALID_MMX_REG_MODE_3DNOW(MODE) \
+    ((MODE) == V2SFmode || (MODE) == SFmode)
+
 #define VALID_MMX_REG_MODE(MODE) \
     ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \
      || (MODE) == V2SImode || (MODE) == SImode)

 #define VECTOR_MODE_SUPPORTED_P(MODE)					\
     (VALID_SSE_REG_MODE (MODE) && TARGET_SSE ? 1			\
-     : VALID_MMX_REG_MODE (MODE) && TARGET_MMX ? 1 : 0)
+     : VALID_MMX_REG_MODE (MODE) && TARGET_MMX ? 1			\
+     : VALID_MMX_REG_MODE_3DNOW (MODE) && TARGET_3DNOW ? 1 : 0)

 #define VALID_FP_MODE_P(mode) \
     ((mode) == SFmode || (mode) == DFmode || (mode) == TFmode	\
@@ -2203,6 +2215,38 @@ enum ix86_builtins
   IX86_BUILTIN_STMXCSR,
   IX86_BUILTIN_SFENCE,
   IX86_BUILTIN_PREFETCH,
+
+  /* 3DNow! Original */
+  IX86_BUILTIN_FEMMS,
+  IX86_BUILTIN_PAVGUSB,
+  IX86_BUILTIN_PF2ID,
+  IX86_BUILTIN_PFACC,
+  IX86_BUILTIN_PFADD,
+  IX86_BUILTIN_PFCMPEQ,
+  IX86_BUILTIN_PFCMPGE,
+  IX86_BUILTIN_PFCMPGT,
+  IX86_BUILTIN_PFMAX,
+  IX86_BUILTIN_PFMIN,
+  IX86_BUILTIN_PFMUL,
+  IX86_BUILTIN_PFRCP,
+  IX86_BUILTIN_PFRCPIT1,
+  IX86_BUILTIN_PFRCPIT2,
+  IX86_BUILTIN_PFRSQIT1,
+  IX86_BUILTIN_PFRSQRT,
+  IX86_BUILTIN_PFSUB,
+  IX86_BUILTIN_PFSUBR,
+  IX86_BUILTIN_PI2FD,
+  IX86_BUILTIN_PMULHRW,
+  IX86_BUILTIN_PREFETCH_3DNOW, /* PREFETCH already used */
+  IX86_BUILTIN_PREFETCHW,
+
+  /* 3DNow! Athlon Extensions */
+  IX86_BUILTIN_PF2IW,
+  IX86_BUILTIN_PFNACC,
+  IX86_BUILTIN_PFPNACC,
+  IX86_BUILTIN_PI2FW,
+  IX86_BUILTIN_PSWAPDSI,
+  IX86_BUILTIN_PSWAPDSF,

   /* Composite builtins, expand to more than one insn.  */
   IX86_BUILTIN_SETPS1,
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.296
diff -u -p -r1.296 i386.md
--- config/i386/i386.md	2001/09/28 17:20:23	1.296
+++ config/i386/i386.md	2001/09/28 17:41:07
@@ -92,6 +92,15 @@
 ;; 43 This is a `rsqsrt' operation.
 ;; 44 This is a `sfence' operation.
 ;; 45 This is a noop to prevent excessive combiner cleverness.
+;; 46 This is a `femms' operation.
+;; 47 This is a `prefetch' (3DNow) operation.
+;; 48 This is a `prefetchw' operation.
+;; 49 This is a 'pavgusb' operation.
+;; 50 This is a `pfrcp' operation.
+;; 51 This is a `pfrcpit1' operation.
+;; 52 This is a `pfrcpit2' operation.
+;; 53 This is a `pfrsqrt' operation.
+;; 54 This is a `pfrsqrit1' operation.

 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
 ;; from i386.c.
@@ -17455,6 +17464,13 @@
   "movq\t{%1, %0|%0, %1}"
   [(set_attr "type" "mmx")])

+(define_insn "movv2sf_internal"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
+        (match_operand:V2SF 1 "general_operand" "ym,y"))]
+  "TARGET_3DNOW"
+  "movq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
 (define_expand "movti"
   [(set (match_operand:TI 0 "general_operand" "")
 	(match_operand:TI 1 "general_operand" ""))]
@@ -17641,6 +17657,40 @@
     }
 })

+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0 "general_operand" "")
+	(match_operand:V2SF 1 "general_operand" ""))]
+   "TARGET_3DNOW"
+   "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V2SFmode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr,
+		      XEXP (force_const_mem (V2SFmode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V2SFmode, addr);
+   }
+
+  /* Make operand1 a register is it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V2SFmode)
+      && !register_operand (operands[1], V2SFmode)
+      && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+      && operands[1] != CONST0_RTX (V2SFmode))
+   {
+      rtx temp = force_reg (V2SFmode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
 (define_insn_and_split "*pushti"
   [(set (match_operand:TI 0 "push_operand" "=<")
 	(match_operand:TI 1 "nonmemory_operand" "x"))]
@@ -17707,6 +17757,17 @@
   ""
   [(set_attr "type" "mmx")])

+(define_insn_and_split "*pushv2sf"
+  [(set (match_operand:V2SF 0 "push_operand" "=<")
+	(match_operand:V2SF 1 "nonmemory_operand" "y"))]
+  "TARGET_3DNOW"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+   (set (mem:V2SF (reg:SI 7)) (match_dup 1))]
+  ""
+  [(set_attr "type" "mmx")])
+
 (define_insn "movti_internal"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
 	(match_operand:TI 1 "general_operand" "xm,x"))]
@@ -17749,7 +17810,7 @@
 (define_insn "mmx_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pmovmskb\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")])

@@ -17757,7 +17818,7 @@
   [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
 	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
 		      (match_operand:V8QI 2 "register_operand" "y")] 32))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   ;; @@@ check ordering of operands in intel/nonintel syntax
   "maskmovq\t{%2, %1|%1, %2}"
   [(set_attr "type" "sse")])
@@ -17772,7 +17833,7 @@
 (define_insn "sse_movntdi"
   [(set (match_operand:DI 0 "memory_operand" "=m")
 	(unspec:DI [(match_operand:DI 1 "register_operand" "y")] 34))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "movntq\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")])

@@ -18535,7 +18596,7 @@
 	  (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
 		     (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
 	  (const_int 16))))]
-  "TARGET_MMX"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pmulhuw\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmx")])

@@ -18628,7 +18689,7 @@
 					       (const_int 1)
 					       (const_int 1)])))
 	 (const_int 1)))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pavgb\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])

@@ -18643,7 +18704,7 @@
 					       (const_int 1)
 					       (const_int 1)])))
 	 (const_int 1)))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pavgw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])

@@ -18651,7 +18712,7 @@
   [(set (match_operand:V8QI 0 "register_operand" "=y")
         (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
 			      (match_operand:V8QI 2 "nonimmediate_operand" "ym"))))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "psadbw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])

@@ -18664,7 +18725,7 @@
 			(vec_duplicate:V4HI
 			 (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm")))
 			(match_operand:SI 3 "immediate_operand" "i")))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pinsrw\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "sse")])

@@ -18673,7 +18734,7 @@
         (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
 				       (parallel
 					[(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pextrw\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sse")])

@@ -18682,7 +18743,7 @@
         (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0")
 		      (match_operand:V4HI 2 "nonimmediate_operand" "ym")
 		      (match_operand:SI 3 "immediate_operand" "i")] 41))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pshufw\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "sse")])

@@ -18744,7 +18805,7 @@
   [(set (match_operand:V8QI 0 "register_operand" "=y")
         (umax:V8QI (match_operand:V8QI 1 "register_operand" "0")
 		   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pmaxub\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])

@@ -18752,7 +18813,7 @@
   [(set (match_operand:V4HI 0 "register_operand" "=y")
         (smax:V4HI (match_operand:V4HI 1 "register_operand" "0")
 		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pmaxsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])

@@ -18760,7 +18821,7 @@
   [(set (match_operand:V8QI 0 "register_operand" "=y")
         (umin:V8QI (match_operand:V8QI 1 "register_operand" "0")
 		   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pminub\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])

@@ -18768,7 +18829,7 @@
   [(set (match_operand:V4HI 0 "register_operand" "=y")
         (smin:V4HI (match_operand:V4HI 1 "register_operand" "0")
 		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pminsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])

@@ -19031,7 +19092,7 @@
 (define_expand "sfence"
   [(set (match_dup 0)
 	(unspec:BLK [(match_dup 0)] 44))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
 {
   operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
   MEM_VOLATILE_P (operands[0]) = 1;
@@ -19040,7 +19101,7 @@
 (define_insn "*sfence_insn"
   [(set (match_operand:BLK 0 "" "")
 	(unspec:BLK [(match_dup 0)] 44))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "sfence"
   [(set_attr "type" "sse")
    (set_attr "memory" "unknown")])
@@ -19048,7 +19109,7 @@
 (define_insn "prefetch"
   [(unspec [(match_operand:SI 0 "address_operand" "p")
 	    (match_operand:SI 1 "immediate_operand" "n")] 35)]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
 {
   switch (INTVAL (operands[1]))
     {
@@ -19126,3 +19187,289 @@
    (set_attr "memory" "store")
    (set_attr "modrm" "0")
    (set_attr "mode" "DI")])
+
+;; 3Dnow! instructions
+
+(define_insn "addv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(plus:V2SF (match_operand:V2SF 1 "register_operand" "0")
+		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfadd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "subv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (minus:V2SF (match_operand:V2SF 1 "register_operand" "0")
+		    (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfsub\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "subrv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+                    (match_operand:V2SF 1 "register_operand" "0")))]
+  "TARGET_3DNOW"
+  "pfsubr\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "gtv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+  "pfcmpgt\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "gev2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpge\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "eqv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(eq:V2SI (match_operand:V2SF 1 "register_operand" "0")
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpeq\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfmaxv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (smax:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfmax\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfminv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (smin:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfmin\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mulv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(mult:V2SF (match_operand:V2SF 1 "register_operand" "0")
+		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfmul\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "femms"
+  [(unspec_volatile [(const_int 0)] 46)
+   (clobber (reg:XF 8))
+   (clobber (reg:XF 9))
+   (clobber (reg:XF 10))
+   (clobber (reg:XF 11))
+   (clobber (reg:XF 12))
+   (clobber (reg:XF 13))
+   (clobber (reg:XF 14))
+   (clobber (reg:XF 15))
+   (clobber (reg:DI 29))
+   (clobber (reg:DI 30))
+   (clobber (reg:DI 31))
+   (clobber (reg:DI 32))
+   (clobber (reg:DI 33))
+   (clobber (reg:DI 34))
+   (clobber (reg:DI 35))
+   (clobber (reg:DI 36))]
+  "TARGET_3DNOW"
+  "femms"
+  [(set_attr "type" "mmx")])
+
+(define_insn "prefetch_3dnow"
+  [(unspec [(match_operand:SI 0 "address_operand" "p")] 47)]
+  "TARGET_3DNOW"
+  "prefetch\\t%a0"
+  [(set_attr "type" "mmx")])
+
+(define_insn "prefetchw"
+  [(unspec [(match_operand:SI 0 "address_operand" "p")] 48)]
+  "TARGET_3DNOW"
+  "prefetchw\\t%a0"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pf2id"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pf2id\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pf2iw"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(sign_extend:V2SI
+	   (ss_truncate:V2HI
+	      (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
+  "TARGET_3DNOW_A"
+  "pf2iw\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfacc"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(vec_concat:V2SF
+	   (plus:SF
+	      (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+			     (parallel [(const_int  0)]))
+	      (vec_select:SF (match_dup 1)
+			     (parallel [(const_int 1)])))
+           (plus:SF
+              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+			     (parallel [(const_int  0)]))
+              (vec_select:SF (match_dup 2)
+			     (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW"
+  "pfacc\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfnacc"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+  	(vec_concat:V2SF
+           (minus:SF
+              (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+			     (parallel [(const_int 0)]))
+              (vec_select:SF (match_dup 1)
+			     (parallel [(const_int 1)])))
+           (minus:SF
+              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+			     (parallel [(const_int  0)]))
+              (vec_select:SF (match_dup 2)
+			     (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW_A"
+  "pfnacc\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfpnacc"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (vec_concat:V2SF
+           (minus:SF
+              (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+			     (parallel [(const_int 0)]))
+              (vec_select:SF (match_dup 1)
+			     (parallel [(const_int 1)])))
+           (plus:SF
+              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+			     (parallel [(const_int 0)]))
+              (vec_select:SF (match_dup 2)
+			     (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW_A"
+  "pfpnacc\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pi2fw"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(float:V2SF
+	   (vec_concat:V2SI
+	      (sign_extend:SI
+		 (truncate:HI
+		    (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+				   (parallel [(const_int 0)]))))
+              (sign_extend:SI
+		 (truncate:HI
+                    (vec_select:SI (match_dup 1)
+				   (parallel [(const_int  1)])))))))]
+  "TARGET_3DNOW_A"
+  "pi2fw\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "floatv2si2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pi2fd\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+;; This insn is identical to pavgb in operation, but the opcode is
+;; different.  To avoid accidentally matching pavgb, use an unspec.
+
+(define_insn "pavgusb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+       (unspec:V8QI
+          [(match_operand:V8QI 1 "register_operand" "0")
+           (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 49))]
+  "TARGET_3DNOW"
+  "pavgusb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+;; 3DNow reciprical and sqrt
+
+(define_insn "pfrcpv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 50))]
+  "TARGET_3DNOW"
+  "pfrcp\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfrcpit1v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 51))]
+  "TARGET_3DNOW"
+  "pfrcpit1\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfrcpit2v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 52))]
+  "TARGET_3DNOW"
+  "pfrcpit2\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfrsqrtv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 53))]
+  "TARGET_3DNOW"
+   "pfrsqrt\\t{%1, %0|%0, %1}"
+   [(set_attr "type" "mmx")])
+
+(define_insn "pfrsqit1v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 54))]
+  "TARGET_3DNOW"
+  "pfrsqit1\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pmulhrwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	   (lshiftrt:V4SI
+	      (plus:V4SI
+	         (mult:V4SI
+	            (sign_extend:V4SI
+		       (match_operand:V4HI 1 "register_operand" "0"))
+	            (sign_extend:V4SI
+		       (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	      (vec_const:V4SI
+	         (parallel [(const_int 0x8000)
+			    (const_int 0x8000)
+			    (const_int 0x8000)
+			    (const_int 0x8000)])))
+	   (const_int 16))))]
+  "TARGET_3DNOW"
+  "pmulhrw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pswapdv2si2"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+			 (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A"
+  "pswapd\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pswapdv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
+			 (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A"
+  "pswapd\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]