PATCH: x86/x86-64: Support Intel MNI intrinsics
H. J. Lu
hjl@lucon.org
Fri Feb 24 20:47:00 GMT 2006
These 2 patches implement Intel MNI intrinsics and fix the 64bit SSE3
intrinsics. The corresponding assembler change is at
http://sourceware.org/ml/binutils/2006-02/msg00322.html
H.J.
----
gcc/
2005-11-17 H.J. Lu <hongjiu.lu@intel.com>
PR target/24879
* config/i386/i386.c (ix86_init_mmx_sse_builtins): Add
void_ftype_di_di and void_ftype_pcvoid_di_di. Use void_ftype_di_di
on __builtin_ia32_mwait and void_ftype_pcvoid_di_di on
__builtin_ia32_monitor for 64bit.
(ix86_expand_builtin): Support 64bit monitor and mwait.
* config/i386/pmmintrin.h (_mm_monitor): Remove macro. Use
inline function.
(_mm_mwait): Likewise.
* config/i386/sse.md (sse3_mwait): Make it 32bit only.
(sse3_mwait64): New. 64bit mwait.
(sse3_monitor): Make it 32bit only.
(sse3_monitor64): New. 64bit monitor.
gcc/testsuite/
2005-11-17 H.J. Lu <hongjiu.lu@intel.com>
PR target/24879
* gcc.target/i386/monitor.c: New file.
2006-02-24 H.J. Lu <hongjiu.lu@intel.com>
* config.gcc (i[34567]86-*-*): Add tmmintrin.h to extra_headers.
(x86_64-*-*): Likewise.
* config/i386/i386.c (pta_flags): Add PTA_MNI.
(override_options): Check MNI.
(ix86_builtins): Add IX86_BUILTIN_PHADDW, IX86_BUILTIN_PHADDD,
IX86_BUILTIN_PHADDSW, IX86_BUILTIN_PHSUBW, IX86_BUILTIN_PHSUBD,
IX86_BUILTIN_PHSUBSW, IX86_BUILTIN_PMADDUBSW,
IX86_BUILTIN_PMULHRSW, IX86_BUILTIN_PSHUFB,
IX86_BUILTIN_PSIGNB, IX86_BUILTIN_PSIGNW, IX86_BUILTIN_PSIGND,
IX86_BUILTIN_PALIGNR, IX86_BUILTIN_PABSB, IX86_BUILTIN_PABSW,
IX86_BUILTIN_PABSD, IX86_BUILTIN_PHADDW128,
IX86_BUILTIN_PHADDD128, IX86_BUILTIN_PHADDSW128,
IX86_BUILTIN_PHSUBW128, IX86_BUILTIN_PHSUBD128,
IX86_BUILTIN_PHSUBSW128, IX86_BUILTIN_PMADDUBSW128,
IX86_BUILTIN_PMULHRSW128, IX86_BUILTIN_PSHUFB128,
IX86_BUILTIN_PSIGNB128, IX86_BUILTIN_PSIGNW128,
IX86_BUILTIN_PSIGND128, IX86_BUILTIN_PALIGNR128,
IX86_BUILTIN_PABSB128, IX86_BUILTIN_PABSW128 and
IX86_BUILTIN_PABSD128.
(bdesc_2arg): Add MNI.
(bdesc_1arg): Likewise.
(ix86_init_mmx_sse_builtins): Support MNI.
(ix86_expand_builtin): Likewise.
* config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Likewise.
* config/i386/i386.md (UNSPEC_LDQQU): Renamed to ...
(UNSPEC_LDDQU): This.
(UNSPEC_PSHUFB): New.
(UNSPEC_PSIGN): Likewise.
(UNSPEC_PALIGNR): Likewise.
Include mmx.md before sse.md.
* config/i386/i386.opt: Add -mmni.
* config/i386/sse.md (sse3_lddqu): Updated.
(mni_phaddwv8hi3): New pattern for MNI.
(mni_phaddwv4hi3): Likewise.
(mni_phadddv4si3): Likewise.
(mni_phadddv2si3): Likewise.
(mni_phaddswv8hi3): Likewise.
(mni_phaddswv4hi3): Likewise.
(mni_phsubwv8hi3): Likewise.
(mni_phsubwv4hi3): Likewise.
(mni_phsubdv4si3): Likewise.
(mni_phsubdv2si3): Likewise.
(mni_phsubswv8hi3): Likewise.
(mni_phsubswv4hi3): Likewise.
(mni_pmaddubswv8hi3): Likewise.
(mni_pmaddubswv4hi3): Likewise.
(mni_pmulhrswv8hi3): Likewise.
(mni_pmulhrswv4hi3): Likewise.
(mni_pshufbv16qi3): Likewise.
(mni_pshufbv8qi3): Likewise.
(mni_psign<mode>3): Likewise.
(mni_psign<mode>3): Likewise.
(mni_palignrti): Likewise.
(mni_palignrdi): Likewise.
(mni_pabs<mode>2): Likewise.
(mni_pabs<mode>2): Likewise.
* config/i386/tmmintrin.h: New file.
* doc/extend.texi: Document MNI built-in functions.
* doc/invoke.texi: Document -mmni/-mno-mni switches.
-------------- next part --------------
gcc/
2005-11-17 H.J. Lu <hongjiu.lu@intel.com>
PR target/24879
* config/i386/i386.c (ix86_init_mmx_sse_builtins): Add
void_ftype_di_di and void_ftype_pcvoid_di_di. Use void_ftype_di_di
on __builtin_ia32_mwait and void_ftype_pcvoid_di_di on
__builtin_ia32_monitor for 64bit.
(ix86_expand_builtin): Support 64bit monitor and mwait.
* config/i386/pmmintrin.h (_mm_monitor): Remove macro. Use
inline function.
(_mm_mwait): Likewise.
* config/i386/sse.md (sse3_mwait): Make it 32bit only.
(sse3_mwait64): New. 64bit mwait.
(sse3_monitor): Make it 32bit only.
(sse3_monitor64): New. 64bit monitor.
gcc/testsuite/
2005-11-17 H.J. Lu <hongjiu.lu@intel.com>
PR target/24879
* gcc.target/i386/monitor.c: New file.
--- gcc/config/i386/i386.c.sse3 2006-01-31 09:22:13.000000000 -0800
+++ gcc/config/i386/i386.c 2006-01-31 09:27:45.000000000 -0800
@@ -14766,10 +14766,20 @@ ix86_init_mmx_sse_builtins (void)
tree void_ftype_unsigned_unsigned
= build_function_type_list (void_type_node, unsigned_type_node,
unsigned_type_node, NULL_TREE);
+ tree void_ftype_di_di
+ = build_function_type_list (void_type_node,
+ long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ NULL_TREE);
tree void_ftype_pcvoid_unsigned_unsigned
= build_function_type_list (void_type_node, const_ptr_type_node,
unsigned_type_node, unsigned_type_node,
NULL_TREE);
+ tree void_ftype_pcvoid_di_di
+ = build_function_type_list (void_type_node, const_ptr_type_node,
+ long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ NULL_TREE);
tree unsigned_ftype_void
= build_function_type (unsigned_type_node, void_list_node);
tree v2si_ftype_v4sf
@@ -15227,12 +15237,24 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
/* Prescott New Instructions. */
- def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
- void_ftype_pcvoid_unsigned_unsigned,
- IX86_BUILTIN_MONITOR);
- def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
- void_ftype_unsigned_unsigned,
- IX86_BUILTIN_MWAIT);
+ if (TARGET_64BIT)
+ {
+ def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
+ void_ftype_pcvoid_di_di,
+ IX86_BUILTIN_MONITOR);
+ def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
+ void_ftype_di_di,
+ IX86_BUILTIN_MWAIT);
+ }
+ else
+ {
+ def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
+ void_ftype_pcvoid_unsigned_unsigned,
+ IX86_BUILTIN_MONITOR);
+ def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
+ void_ftype_unsigned_unsigned,
+ IX86_BUILTIN_MWAIT);
+ }
def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
v4sf_ftype_v4sf,
IX86_BUILTIN_MOVSHDUP);
@@ -16140,13 +16162,17 @@ ix86_expand_builtin (tree exp, rtx targe
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
+ mode = TARGET_64BIT ? DImode : SImode;
if (!REG_P (op0))
- op0 = copy_to_mode_reg (SImode, op0);
+ op0 = copy_to_mode_reg (mode, op0);
if (!REG_P (op1))
- op1 = copy_to_mode_reg (SImode, op1);
+ op1 = copy_to_mode_reg (mode, op1);
if (!REG_P (op2))
- op2 = copy_to_mode_reg (SImode, op2);
- emit_insn (gen_sse3_monitor (op0, op1, op2));
+ op2 = copy_to_mode_reg (mode, op2);
+ if (TARGET_64BIT)
+ emit_insn (gen_sse3_monitor64 (op0, op1, op2));
+ else
+ emit_insn (gen_sse3_monitor (op0, op1, op2));
return 0;
case IX86_BUILTIN_MWAIT:
@@ -16154,11 +16180,15 @@ ix86_expand_builtin (tree exp, rtx targe
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
+ mode = TARGET_64BIT ? DImode : SImode;
if (!REG_P (op0))
- op0 = copy_to_mode_reg (SImode, op0);
+ op0 = copy_to_mode_reg (mode, op0);
if (!REG_P (op1))
- op1 = copy_to_mode_reg (SImode, op1);
- emit_insn (gen_sse3_mwait (op0, op1));
+ op1 = copy_to_mode_reg (mode, op1);
+ if (TARGET_64BIT)
+ emit_insn (gen_sse3_mwait64 (op0, op1));
+ else
+ emit_insn (gen_sse3_mwait (op0, op1));
return 0;
case IX86_BUILTIN_LDDQU:
--- gcc/config/i386/pmmintrin.h.sse3 2005-11-04 14:13:48.000000000 -0800
+++ gcc/config/i386/pmmintrin.h 2006-01-31 09:22:13.000000000 -0800
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
This file is part of GCC.
@@ -25,7 +25,7 @@
Public License. */
/* Implemented from the specification included in the Intel C++ Compiler
- User Guide and Reference, version 8.0. */
+ User Guide and Reference, version 9.0. */
#ifndef _PMMINTRIN_H_INCLUDED
#define _PMMINTRIN_H_INCLUDED
@@ -110,7 +110,6 @@ _mm_lddqu_si128 (__m128i const *__P)
return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
}
-#if 0
static __inline void __attribute__((__always_inline__))
_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
{
@@ -122,10 +121,6 @@ _mm_mwait (unsigned int __E, unsigned in
{
__builtin_ia32_mwait (__E, __H);
}
-#else
-#define _mm_monitor(P, E, H) __builtin_ia32_monitor ((P), (E), (H))
-#define _mm_mwait(E, H) __builtin_ia32_mwait ((E), (H))
-#endif
#endif /* __SSE3__ */
--- gcc/config/i386/sse.md.sse3 2006-01-19 09:18:36.000000000 -0800
+++ gcc/config/i386/sse.md 2006-01-31 09:22:13.000000000 -0800
@@ -3941,15 +3941,36 @@
[(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
(match_operand:SI 1 "register_operand" "c")]
UNSPECV_MWAIT)]
- "TARGET_SSE3"
+ "TARGET_SSE3 && !TARGET_64BIT"
"mwait\t%0, %1"
[(set_attr "length" "3")])
+(define_insn "sse3_mwait64"
+ [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
+ (match_operand:DI 1 "register_operand" "c")]
+ UNSPECV_MWAIT)]
+ "TARGET_SSE3 && TARGET_64BIT"
+;; Older assembler doesn't support "mwait %rax,%rcx".
+;; "mwait\t%0, %1"
+ "mwait"
+ [(set_attr "length" "3")])
+
(define_insn "sse3_monitor"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
(match_operand:SI 1 "register_operand" "c")
(match_operand:SI 2 "register_operand" "d")]
UNSPECV_MONITOR)]
- "TARGET_SSE3"
+ "TARGET_SSE3 && !TARGET_64BIT"
"monitor\t%0, %1, %2"
[(set_attr "length" "3")])
+
+(define_insn "sse3_monitor64"
+ [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
+ (match_operand:DI 1 "register_operand" "c")
+ (match_operand:DI 2 "register_operand" "d")]
+ UNSPECV_MONITOR)]
+ "TARGET_SSE3 && TARGET_64BIT"
+;; Older assembler doesn't support "monitor %rax,%rcx,%rdx".
+;; "monitor\t%0, %1, %2"
+ "monitor"
+ [(set_attr "length" "3")])
--- gcc/testsuite/gcc.target/i386/monitor.c.sse3 2006-01-31 09:22:13.000000000 -0800
+++ gcc/testsuite/gcc.target/i386/monitor.c 2006-01-31 09:22:13.000000000 -0800
@@ -0,0 +1,27 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse3" } */
+
+/* Verify that they work in both 32bit and 64bit. */
+
+#include <pmmintrin.h>
+
+void
+foo (char *p, int x, int y, int z)
+{
+ _mm_monitor (p, y, x);
+ _mm_mwait (z, y);
+}
+
+void
+bar (char *p, long x, long y, long z)
+{
+ _mm_monitor (p, y, x);
+ _mm_mwait (z, y);
+}
+
+void
+foo1 (char *p)
+{
+ _mm_monitor (p, 0, 0);
+ _mm_mwait (0, 0);
+}
-------------- next part --------------
2006-02-24 H.J. Lu <hongjiu.lu@intel.com>
* config.gcc (i[34567]86-*-*): Add tmmintrin.h to extra_headers.
(x86_64-*-*): Likewise.
* config/i386/i386.c (pta_flags): Add PTA_MNI.
(override_options): Check MNI.
(ix86_builtins): Add IX86_BUILTIN_PHADDW, IX86_BUILTIN_PHADDD,
IX86_BUILTIN_PHADDSW, IX86_BUILTIN_PHSUBW, IX86_BUILTIN_PHSUBD,
IX86_BUILTIN_PHSUBSW, IX86_BUILTIN_PMADDUBSW,
IX86_BUILTIN_PMULHRSW, IX86_BUILTIN_PSHUFB,
IX86_BUILTIN_PSIGNB, IX86_BUILTIN_PSIGNW, IX86_BUILTIN_PSIGND,
IX86_BUILTIN_PALIGNR, IX86_BUILTIN_PABSB, IX86_BUILTIN_PABSW,
IX86_BUILTIN_PABSD, IX86_BUILTIN_PHADDW128,
IX86_BUILTIN_PHADDD128, IX86_BUILTIN_PHADDSW128,
IX86_BUILTIN_PHSUBW128, IX86_BUILTIN_PHSUBD128,
IX86_BUILTIN_PHSUBSW128, IX86_BUILTIN_PMADDUBSW128,
IX86_BUILTIN_PMULHRSW128, IX86_BUILTIN_PSHUFB128,
IX86_BUILTIN_PSIGNB128, IX86_BUILTIN_PSIGNW128,
IX86_BUILTIN_PSIGND128, IX86_BUILTIN_PALIGNR128,
IX86_BUILTIN_PABSB128, IX86_BUILTIN_PABSW128 and
IX86_BUILTIN_PABSD128.
(bdesc_2arg): Add MNI.
(bdesc_1arg): Likewise.
(ix86_init_mmx_sse_builtins): Support MNI.
(ix86_expand_builtin): Likewise.
* config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Likewise.
* config/i386/i386.md (UNSPEC_LDQQU): Renamed to ...
(UNSPEC_LDDQU): This.
(UNSPEC_PSHUFB): New.
(UNSPEC_PSIGN): Likewise.
(UNSPEC_PALIGNR): Likewise.
Include mmx.md before sse.md.
* config/i386/i386.opt: Add -mmni.
* config/i386/sse.md (sse3_lddqu): Updated.
(mni_phaddwv8hi3): New pattern for MNI.
(mni_phaddwv4hi3): Likewise.
(mni_phadddv4si3): Likewise.
(mni_phadddv2si3): Likewise.
(mni_phaddswv8hi3): Likewise.
(mni_phaddswv4hi3): Likewise.
(mni_phsubwv8hi3): Likewise.
(mni_phsubwv4hi3): Likewise.
(mni_phsubdv4si3): Likewise.
(mni_phsubdv2si3): Likewise.
(mni_phsubswv8hi3): Likewise.
(mni_phsubswv4hi3): Likewise.
(mni_pmaddubswv8hi3): Likewise.
(mni_pmaddubswv4hi3): Likewise.
(mni_pmulhrswv8hi3): Likewise.
(mni_pmulhrswv4hi3): Likewise.
(mni_pshufbv16qi3): Likewise.
(mni_pshufbv8qi3): Likewise.
(mni_psign<mode>3): Likewise.
(mni_psign<mode>3): Likewise.
(mni_palignrti): Likewise.
(mni_palignrdi): Likewise.
(mni_pabs<mode>2): Likewise.
(mni_pabs<mode>2): Likewise.
* config/i386/tmmintrin.h: New file.
* doc/extend.texi: Document MNI built-in functions.
* doc/invoke.texi: Document -mmni/-mno-mni switches.
--- gcc/config.gcc.mni 2006-02-18 07:12:03.000000000 -0800
+++ gcc/config.gcc 2006-02-24 11:27:59.000000000 -0800
@@ -263,11 +263,13 @@ xscale-*-*)
;;
i[34567]86-*-*)
cpu_type=i386
- extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h"
+ extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
+ pmmintrin.h tmmintrin.h"
;;
x86_64-*-*)
cpu_type=i386
- extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h"
+ extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
+ pmmintrin.h tmmintrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)
--- gcc/config/i386/i386.c.mni 2006-02-24 11:27:59.000000000 -0800
+++ gcc/config/i386/i386.c 2006-02-24 11:27:59.000000000 -0800
@@ -1460,11 +1460,12 @@ override_options (void)
PTA_SSE = 1,
PTA_SSE2 = 2,
PTA_SSE3 = 4,
- PTA_MMX = 8,
- PTA_PREFETCH_SSE = 16,
- PTA_3DNOW = 32,
- PTA_3DNOW_A = 64,
- PTA_64BIT = 128
+ PTA_MNI = 8,
+ PTA_MMX = 16,
+ PTA_PREFETCH_SSE = 32,
+ PTA_3DNOW = 64,
+ PTA_3DNOW_A = 128,
+ PTA_64BIT = 256
} flags;
}
const processor_alias_table[] =
@@ -1659,6 +1660,9 @@ override_options (void)
if (processor_alias_table[i].flags & PTA_SSE3
&& !(target_flags_explicit & MASK_SSE3))
target_flags |= MASK_SSE3;
+ if (processor_alias_table[i].flags & PTA_MNI
+ && !(target_flags_explicit & MASK_MNI))
+ target_flags |= MASK_MNI;
if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
x86_prefetch_sse = true;
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
@@ -1851,6 +1855,10 @@ override_options (void)
if (!TARGET_80387)
target_flags |= MASK_NO_FANCY_MATH_387;
+ /* Turn on SSE3 builtins for -mmni. */
+ if (TARGET_MNI)
+ target_flags |= MASK_SSE3;
+
/* Turn on SSE2 builtins for -msse3. */
if (TARGET_SSE3)
target_flags |= MASK_SSE2;
@@ -14269,6 +14277,41 @@ enum ix86_builtins
IX86_BUILTIN_MONITOR,
IX86_BUILTIN_MWAIT,
+ /* Merom New Instructions. */
+ IX86_BUILTIN_PHADDW,
+ IX86_BUILTIN_PHADDD,
+ IX86_BUILTIN_PHADDSW,
+ IX86_BUILTIN_PHSUBW,
+ IX86_BUILTIN_PHSUBD,
+ IX86_BUILTIN_PHSUBSW,
+ IX86_BUILTIN_PMADDUBSW,
+ IX86_BUILTIN_PMULHRSW,
+ IX86_BUILTIN_PSHUFB,
+ IX86_BUILTIN_PSIGNB,
+ IX86_BUILTIN_PSIGNW,
+ IX86_BUILTIN_PSIGND,
+ IX86_BUILTIN_PALIGNR,
+ IX86_BUILTIN_PABSB,
+ IX86_BUILTIN_PABSW,
+ IX86_BUILTIN_PABSD,
+
+ IX86_BUILTIN_PHADDW128,
+ IX86_BUILTIN_PHADDD128,
+ IX86_BUILTIN_PHADDSW128,
+ IX86_BUILTIN_PHSUBW128,
+ IX86_BUILTIN_PHSUBD128,
+ IX86_BUILTIN_PHSUBSW128,
+ IX86_BUILTIN_PMADDUBSW128,
+ IX86_BUILTIN_PMULHRSW128,
+ IX86_BUILTIN_PSHUFB128,
+ IX86_BUILTIN_PSIGNB128,
+ IX86_BUILTIN_PSIGNW128,
+ IX86_BUILTIN_PSIGND128,
+ IX86_BUILTIN_PALIGNR128,
+ IX86_BUILTIN_PABSB128,
+ IX86_BUILTIN_PABSW128,
+ IX86_BUILTIN_PABSD128,
+
IX86_BUILTIN_VEC_INIT_V2SI,
IX86_BUILTIN_VEC_INIT_V4HI,
IX86_BUILTIN_VEC_INIT_V8QI,
@@ -14631,7 +14674,33 @@ static const struct builtin_description
{ MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
{ MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
{ MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
- { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
+ { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
+
+ /* MNI MMX */
+ { MASK_MNI, CODE_FOR_mni_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
};
static const struct builtin_description bdesc_1arg[] =
@@ -14676,8 +14745,16 @@ static const struct builtin_description
{ MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
/* SSE3 */
- { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
- { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
+
+ /* MNI */
+ { MASK_MNI, CODE_FOR_mni_pabsv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pabsv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pabsv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pabsv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pabsv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
+ { MASK_MNI, CODE_FOR_mni_pabsv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 }
};
static void
@@ -14824,6 +14901,16 @@ ix86_init_mmx_sse_builtins (void)
/* Normal vector unops. */
tree v4sf_ftype_v4sf
= build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
+ tree v16qi_ftype_v16qi
+ = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
+ tree v8hi_ftype_v8hi
+ = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
+ tree v4si_ftype_v4si
+ = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
+ tree v8qi_ftype_v8qi
+ = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi
+ = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
/* Normal vector binops. */
tree v4sf_ftype_v4sf_v4sf
@@ -14843,6 +14930,12 @@ ix86_init_mmx_sse_builtins (void)
long_long_unsigned_type_node,
long_long_unsigned_type_node, NULL_TREE);
+ tree di_ftype_di_di_int
+ = build_function_type_list (long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ integer_type_node, NULL_TREE);
+
tree v2si_ftype_v2sf
= build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
tree v2sf_ftype_v2si
@@ -14944,6 +15037,9 @@ ix86_init_mmx_sse_builtins (void)
tree v2di_ftype_v2di_int
= build_function_type_list (V2DI_type_node,
V2DI_type_node, integer_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_v2di_int
+ = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ V2DI_type_node, integer_type_node, NULL_TREE);
tree v4si_ftype_v4si_int
= build_function_type_list (V4SI_type_node,
V4SI_type_node, integer_type_node, NULL_TREE);
@@ -15066,6 +15162,50 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (d->mask, d->name, type, d->code);
}
+ /* Add all builtins that are more or less simple operations on 1 operand. */
+ for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ {
+ enum machine_mode mode;
+ tree type;
+
+ if (d->name == 0)
+ continue;
+ mode = insn_data[d->icode].operand[1].mode;
+
+ switch (mode)
+ {
+ case V16QImode:
+ type = v16qi_ftype_v16qi;
+ break;
+ case V8HImode:
+ type = v8hi_ftype_v8hi;
+ break;
+ case V4SImode:
+ type = v4si_ftype_v4si;
+ break;
+ case V2DFmode:
+ type = v2df_ftype_v2df;
+ break;
+ case V4SFmode:
+ type = v4sf_ftype_v4sf;
+ break;
+ case V8QImode:
+ type = v8qi_ftype_v8qi;
+ break;
+ case V4HImode:
+ type = v4hi_ftype_v4hi;
+ break;
+ case V2SImode:
+ type = v2si_ftype_v2si;
+ break;
+
+ default:
+ abort ();
+ }
+
+ def_builtin (d->mask, d->name, type, d->code);
+ }
+
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
@@ -15268,15 +15408,15 @@ ix86_init_mmx_sse_builtins (void)
void_ftype_unsigned_unsigned,
IX86_BUILTIN_MWAIT);
}
- def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
- v4sf_ftype_v4sf,
- IX86_BUILTIN_MOVSHDUP);
- def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
- v4sf_ftype_v4sf,
- IX86_BUILTIN_MOVSLDUP);
def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
+ /* Merom New Instructions. */
+ def_builtin (MASK_MNI, "__builtin_ia32_palignr128",
+ v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
+ def_builtin (MASK_MNI, "__builtin_ia32_palignr", di_ftype_di_di_int,
+ IX86_BUILTIN_PALIGNR);
+
/* Access to the vec_init patterns. */
ftype = build_function_type_list (V2SI_type_node, integer_type_node,
integer_type_node, NULL_TREE);
@@ -15831,7 +15971,7 @@ ix86_expand_builtin (tree exp, rtx targe
tree arglist = TREE_OPERAND (exp, 1);
tree arg0, arg1, arg2;
rtx op0, op1, op2, pat;
- enum machine_mode tmode, mode0, mode1, mode2;
+ enum machine_mode tmode, mode0, mode1, mode2, mode3;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
switch (fcode)
@@ -16208,6 +16348,52 @@ ix86_expand_builtin (tree exp, rtx targe
return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
target, 1);
+ case IX86_BUILTIN_PALIGNR:
+ case IX86_BUILTIN_PALIGNR128:
+ if (fcode == IX86_BUILTIN_PALIGNR)
+ {
+ icode = CODE_FOR_mni_palignrdi;
+ mode = DImode;
+ }
+ else
+ {
+ icode = CODE_FOR_mni_palignrti;
+ mode = V2DImode;
+ }
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+ op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ {
+ op0 = copy_to_reg (op0);
+ op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
+ }
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ op1 = copy_to_reg (op1);
+ op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
+ }
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ {
+ error ("shift must be an immediate");
+ return const0_rtx;
+ }
+ target = gen_reg_rtx (mode);
+ pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
+ op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
case IX86_BUILTIN_VEC_INIT_V2SI:
case IX86_BUILTIN_VEC_INIT_V4HI:
case IX86_BUILTIN_VEC_INIT_V8QI:
--- gcc/config/i386/i386.h.mni 2006-01-31 09:02:32.000000000 -0800
+++ gcc/config/i386/i386.h 2006-02-24 11:27:59.000000000 -0800
@@ -387,6 +387,8 @@ extern int x86_prefetch_sse;
builtin_define ("__SSE2__"); \
if (TARGET_SSE3) \
builtin_define ("__SSE3__"); \
+ if (TARGET_MNI) \
+ builtin_define ("__MNI__"); \
if (TARGET_SSE_MATH && TARGET_SSE) \
builtin_define ("__SSE_MATH__"); \
if (TARGET_SSE_MATH && TARGET_SSE2) \
--- gcc/config/i386/i386.md.mni 2006-02-06 16:56:16.000000000 -0800
+++ gcc/config/i386/i386.md 2006-02-24 11:27:59.000000000 -0800
@@ -104,7 +104,7 @@
(UNSPEC_MFENCE 44)
(UNSPEC_LFENCE 45)
(UNSPEC_PSADBW 46)
- (UNSPEC_LDQQU 47)
+ (UNSPEC_LDDQU 47)
; Generic math support
(UNSPEC_COPYSIGN 50)
@@ -148,6 +148,11 @@
(UNSPEC_SP_TEST 101)
(UNSPEC_SP_TLS_SET 102)
(UNSPEC_SP_TLS_TEST 103)
+
+ ; MNI
+ (UNSPEC_PSHUFB 120)
+ (UNSPEC_PSIGN 121)
+ (UNSPEC_PALIGNR 122)
])
(define_constants
@@ -20476,6 +20481,6 @@
"mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}"
[(set_attr "type" "multi")])
-(include "sse.md")
(include "mmx.md")
+(include "sse.md")
(include "sync.md")
--- gcc/config/i386/i386.opt.mni 2006-01-31 09:02:32.000000000 -0800
+++ gcc/config/i386/i386.opt 2006-02-24 11:27:59.000000000 -0800
@@ -197,6 +197,10 @@ msse3
Target Report Mask(SSE3)
Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation
+mmni
+Target Report Mask(MNI)
+Support MMX, SSE, SSE2, SSE3 and MNI built-in functions and code generation
+
msseregparm
Target RejectNegative Mask(SSEREGPARM)
Use SSE register passing conventions for SF and DF mode
--- gcc/config/i386/sse.md.mni 2006-02-24 11:27:59.000000000 -0800
+++ gcc/config/i386/sse.md 2006-02-24 11:27:59.000000000 -0800
@@ -259,7 +259,7 @@
(define_insn "sse3_lddqu"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
- UNSPEC_LDQQU))]
+ UNSPEC_LDDQU))]
"TARGET_SSE3"
"lddqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
@@ -4004,3 +4004,578 @@
;; "monitor\t%0, %1, %2"
"monitor"
[(set_attr "length" "3")])
+
+;; MNI
+(define_insn "mni_phaddwv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_MNI"
+ "phaddw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_phaddwv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_MNI"
+ "phaddw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_phadddv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_MNI"
+ "phaddd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_phadddv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_MNI"
+ "phaddd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_phaddswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_MNI"
+ "phaddsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_phaddswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_MNI"
+ "phaddsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_phsubwv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_MNI"
+ "phsubw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_phsubwv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_MNI"
+ "phsubw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_phsubdv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_MNI"
+ "phsubd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_phsubdv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_MNI"
+ "phsubd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_phsubswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_MNI"
+ "phsubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_phsubswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_MNI"
+ "phsubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_pmaddubswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ss_plus:V8HI
+ (mult:V8HI
+ (zero_extend:V8HI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)]))))
+ (mult:V8HI
+ (zero_extend:V8HI
+ (vec_select:V16QI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))
+ (sign_extend:V8HI
+ (vec_select:V16QI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)]))))))]
+ "TARGET_MNI"
+ "pmaddubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_pmaddubswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (ss_plus:V4HI
+ (mult:V4HI
+ (zero_extend:V4HI
+ (vec_select:V4QI
+ (match_operand:V8QI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4HI
+ (vec_select:V4QI
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4HI
+ (zero_extend:V4HI
+ (vec_select:V8QI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4HI
+ (vec_select:V8QI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_MNI"
+ "pmaddubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_pmulhrswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 14))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_MNI && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmulhrsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_pmulhrswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (lshiftrt:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_int 14))
+ (const_vector:V4HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_MNI && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmulhrsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_pshufbv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSHUFB))]
+ "TARGET_MNI"
+ "pshufb\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_pshufbv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PSHUFB))]
+ "TARGET_MNI"
+ "pshufb\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_psign<mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSIGN))]
+ "TARGET_MNI"
+ "psign<ssevecsize>\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_psign<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PSIGN))]
+ "TARGET_MNI"
+ "psign<mmxvecsize>\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_palignrti"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ UNSPEC_PALIGNR))]
+ "TARGET_MNI"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "palignr\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_palignrdi"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ UNSPEC_PALIGNR))]
+ "TARGET_MNI"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "palignr\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mni_pabs<mode>2"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
+ "TARGET_MNI"
+ "pabs<ssevecsize>\t{%1, %0|%0, %1}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "TI")])
+
+(define_insn "mni_pabs<mode>2"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
+ "TARGET_MNI"
+ "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DI")])
--- gcc/config/i386/tmmintrin.h.mni 2006-02-24 11:27:59.000000000 -0800
+++ gcc/config/i386/tmmintrin.h 2006-02-24 12:26:08.000000000 -0800
@@ -0,0 +1,224 @@
+/* Copyright (C) 2006 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.1. */
+
+#ifndef _TMMINTRIN_H_INCLUDED
+#define _TMMINTRIN_H_INCLUDED
+
+#ifdef __MNI__
+#include <pmmintrin.h>
+
+static __inline __m128i
+_mm_hadd_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m128i
+_mm_hadd_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+static __inline __m128i
+_mm_hadds_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m64
+_mm_hadd_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m64
+_mm_hadd_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
+}
+
+static __inline __m64
+_mm_hadds_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m128i
+_mm_hsub_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m128i
+_mm_hsub_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+static __inline __m128i
+_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m64
+_mm_hsub_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m64
+_mm_hsub_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
+}
+
+static __inline __m64
+_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m128i
+_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+static __inline __m64
+_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
+}
+
+static __inline __m128i
+_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m64
+_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m128i
+_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+static __inline __m64
+_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+static __inline __m128i
+_mm_sign_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+static __inline __m128i
+_mm_sign_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+static __inline __m128i
+_mm_sign_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+static __inline __m64
+_mm_sign_pi8 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+static __inline __m64
+_mm_sign_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+static __inline __m64
+_mm_sign_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
+}
+
+#define _mm_alignr_epi8(__X, __Y, __N) \
+ ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8))
+
+#define _mm_alignr_pi8(__X, __Y, __N) \
+ ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
+
+static __inline __m128i
+_mm_abs_epi8 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
+}
+
+static __inline __m128i
+_mm_abs_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
+}
+
+static __inline __m128i
+_mm_abs_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
+}
+
+static __inline __m64
+_mm_abs_pi8 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
+}
+
+static __inline __m64
+_mm_abs_pi16 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
+}
+
+static __inline __m64
+_mm_abs_pi32 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
+}
+
+#endif /* __MNI__ */
+
+#endif /* _TMMINTRIN_H_INCLUDED */
--- gcc/doc/extend.texi.mni 2006-02-15 06:29:14.000000000 -0800
+++ gcc/doc/extend.texi 2006-02-24 12:33:35.000000000 -0800
@@ -6766,6 +6766,52 @@ The following built-in functions are ava
Generates the @code{movddup} machine instruction as a load from memory.
@end table
+The following built-in functions are available when @option{-mmni} is used.
+All of them generate the machine instruction that is part of the name
+with MMX registers.
+
+@smallexample
+v2si __builtin_ia32_phaddd (v2si, v2si)
+v4hi __builtin_ia32_phaddw (v4hi, v4hi)
+v4hi __builtin_ia32_phaddsw (v4hi, v4hi)
+v2si __builtin_ia32_phsubd (v2si, v2si)
+v4hi __builtin_ia32_phsubw (v4hi, v4hi)
+v4hi __builtin_ia32_phsubsw (v4hi, v4hi)
+v8qi __builtin_ia32_pmaddubsw (v8qi, v8qi)
+v4hi __builtin_ia32_pmulhrsw (v4hi, v4hi)
+v8qi __builtin_ia32_pshufb (v8qi, v8qi)
+v8qi __builtin_ia32_psignb (v8qi, v8qi)
+v2si __builtin_ia32_psignd (v2si, v2si)
+v4hi __builtin_ia32_psignw (v4hi, v4hi)
+long long __builtin_ia32_palignr (long long, long long, int)
+v8qi __builtin_ia32_pabsb (v8qi)
+v2si __builtin_ia32_pabsd (v2si)
+v4hi __builtin_ia32_pabsw (v4hi)
+@end smallexample
+
+The following built-in functions are available when @option{-mmni} is used.
+All of them generate the machine instruction that is part of the name
+with SSE registers.
+
+@smallexample
+v4si __builtin_ia32_phaddd128 (v4si, v4si)
+v8hi __builtin_ia32_phaddw128 (v8hi, v8hi)
+v8hi __builtin_ia32_phaddsw128 (v8hi, v8hi)
+v4si __builtin_ia32_phsubd128 (v4si, v4si)
+v8hi __builtin_ia32_phsubw128 (v8hi, v8hi)
+v8hi __builtin_ia32_phsubsw128 (v8hi, v8hi)
+v16qi __builtin_ia32_pmaddubsw128 (v16qi, v16qi)
+v8hi __builtin_ia32_pmulhrsw128 (v8hi, v8hi)
+v16qi __builtin_ia32_pshufb128 (v16qi, v16qi)
+v16qi __builtin_ia32_psignb128 (v16qi, v16qi)
+v4si __builtin_ia32_psignd128 (v4si, v4si)
+v8hi __builtin_ia32_psignw128 (v8hi, v8hi)
+v2di __builtin_ia32_palignr (v2di, v2di, int)
+v16qi __builtin_ia32_pabsb128 (v16qi)
+v4si __builtin_ia32_pabsd128 (v4si)
+v8hi __builtin_ia32_pabsw128 (v8hi)
+@end smallexample
+
The following built-in functions are available when @option{-m3dnow} is used.
All of them generate the machine instruction that is part of the name.
--- gcc/doc/invoke.texi.mni 2006-02-21 09:29:35.000000000 -0800
+++ gcc/doc/invoke.texi 2006-02-24 11:51:42.000000000 -0800
@@ -531,7 +531,7 @@ Objective-C and Objective-C++ Dialects}.
-mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol
-mno-wide-multiply -mrtd -malign-double @gol
-mpreferred-stack-boundary=@var{num} @gol
--mmmx -msse -msse2 -msse3 -m3dnow -msselibm @gol
+-mmmx -msse -msse2 -msse3 -mmni -m3dnow -msselibm @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
-m96bit-long-double -mregparm=@var{num} -msseregparm @gol
@@ -9457,6 +9457,8 @@ preferred alignment to @option{-mpreferr
@itemx -mno-sse2
@item -msse3
@itemx -mno-sse3
+@item -mmni
+@itemx -mno-mni
@item -m3dnow
@itemx -mno-3dnow
@opindex mmmx
More information about the Gcc-patches
mailing list