This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[RFC PATCH] Add __attribute__((__artificial__)) (__nodebug__ alternative)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Daniel Berlin <dberlin at dberlin dot org>, Daniel Jacobowitz <drow at false dot org>, Jan Kratochvil <jan dot kratochvil at redhat dot com>, Roland McGrath <roland at redhat dot com>
- Date: Fri, 31 Aug 2007 15:05:20 -0400
- Subject: [RFC PATCH] Add __attribute__((__artificial__)) (__nodebug__ alternative)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
While I think http://gcc.gnu.org/ml/gcc-patches/2005-07/msg01969.html
patch was fine and very useful, not just for *mmintrin.h, but
e.g. for glibc -D_FORTIFY_SOURCE wrappers, from what I gathered
from the lengthy thread others disagree.
To show what I care about is e.g.:
#include <stddef.h>
#include <signal.h>
#include <unistd.h>
extern void *memcpy (void *__restrict, __const void *__restrict, size_t);
extern inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
void *
memcpy (void *__restrict __dest, __const void *__restrict __src, size_t
__len)
{
return __builtin___memcpy_chk (__dest, __src, __len,
__builtin_object_size (__dest, 0));
}
char buf[10];
volatile int l0;
void
abrt (int x)
{
sleep (1200);
}
void
__attribute__((used))
foo (void)
{
memcpy (buf, "abcdefghij", 11);
}
int
main (void)
{
struct sigaction sa;
sa.sa_handler = abrt;
sigemptyset (&sa.sa_mask);
sa.sa_flags = 0;
sigaction (SIGABRT, &sa, NULL);
memcpy (buf, "abcdefghijklmnopq", l0 + 11);
return 0;
}
The memcpy inline with -D_FORTIFY_SOURCE is just an implementation
detail and shouldn't bother the user too much. He is really interested
in knowing where he overflew the buffer and where he calls memcpy from.
Currently, the above will generate compile time warning about buffer
overflow in a wrong spot:
nn.c: In function 'foo':
nn.c:12: warning: call to __builtin___memcpy_chk will always overflow destination buffer
While the function name is right, the file and line number points into
the glibc headers, so doesn't help much location the error.
Also, when stepping through this program in gdb, we will hop into the
memcpy inline, which is really uninteresting to the user. I guess
for *mmintrin.h users this is the same.
Now, if the opposition to __nodebug__ is really so strong, here is
a patch which tries to implement what has been suggested.
The attribute is called __artificial__ instead, for Dwarf2/3 will set
DW_AT_artificial on the abstract DIE (or should it be instead present
on the inlined instance where we have DW_AT_call_{file,line} attributes?)
and for stabs it acts like the old __nodebug__ in the output, all
filenames/linenumbers within the inlined fn are those of the
caller of the artificial inline.
Can gdb, frysk, systemtap and other consumers do something useful with it
though (and by default if possible), for backtraces, etc.?
I know latest frysk can do virtual backtraces from the inlined routines,
so guess it shouldn't be hard to add there a knob which would say ignore
DW_AT_artificial inlines in the backtraces and set it by default, but
can gdb do something similar easily?
2007-08-31 Jakub Jelinek <jakub@redhat.com>
* tree.h (block_nonartificial_location): New prototype.
* tree.c (block_nonartificial_location): New function.
* dwarf2out.c (gen_subprogram_die): Add DW_AT_artificial
if artificial attribute is present on abstract inline decl.
* builtins.c (expand_builtin_memory_chk): Use
block_nonartificial_location.
* c-common.c (handle_artificial_attribute): New function.
(c_common_attribute_table): Add artificial attribute.
* final.c (override_filename, override_linenum): New variables.
(final_scan_insn): For DBX_DEBUG or SDB_DEBUG, set override_filename
and override_linenum if inside of a block inlined from
__attribute__((__artificial__)) function.
(notice_source_line): Honor override_filename and override_linenum.
* doc/extend.texi: Document __attribute__((__artificial__)).
* config/i386/emmintrin.h: Add __artificial__ attribute to
all __always_inline__ functions.
* config/i386/mmintrin.h: Likewise.
* config/i386/tmmintrin.h: Likewise.
* config/i386/mm3dnow.h: Likewise.
* config/i386/pmmintrin.h: Likewise.
* config/i386/ammintrin.h: Likewise.
* config/i386/xmmintrin.h: Likewise.
* config/i386/smmintrin.h: Likewise.
* testsuite/gcc.dg/memcpy-chk-1.c: New test.
--- gcc/tree.h.jj 2007-08-30 18:46:59.000000000 +0200
+++ gcc/tree.h 2007-08-31 19:03:30.000000000 +0200
@@ -4864,6 +4864,7 @@ extern int *expr_lineno (const_tree);
extern tree *tree_block (tree);
extern tree *generic_tree_operand (tree, int);
extern tree *generic_tree_type (tree);
+extern location_t *block_nonartificial_location (tree);
/* In function.c */
extern void expand_main_function (void);
--- gcc/tree.c.jj 2007-08-29 16:33:20.000000000 +0200
+++ gcc/tree.c 2007-08-31 19:08:21.000000000 +0200
@@ -8713,4 +8713,36 @@ call_expr_arglist (tree exp)
return arglist;
}
+/* If BLOCK is inlined from an __attribute__((__artificial__))
+ routine, return pointer to location from where it has been
+ called. */
+location_t *
+block_nonartificial_location (tree block)
+{
+ location_t *ret = NULL;
+
+ while (block && TREE_CODE (block) == BLOCK
+ && BLOCK_ABSTRACT_ORIGIN (block))
+ {
+ tree ao = BLOCK_ABSTRACT_ORIGIN (block);
+
+ while (TREE_CODE (ao) == BLOCK && BLOCK_ABSTRACT_ORIGIN (ao))
+ ao = BLOCK_ABSTRACT_ORIGIN (ao);
+
+ if (TREE_CODE (ao) == FUNCTION_DECL)
+ {
+ if (DECL_DECLARED_INLINE_P (ao)
+ && lookup_attribute ("artificial", DECL_ATTRIBUTES (ao)))
+ ret = &BLOCK_SOURCE_LOCATION (block);
+ else
+ break;
+ }
+ else if (TREE_CODE (ao) != BLOCK)
+ break;
+
+ block = BLOCK_SUPERCONTEXT (block);
+ }
+ return ret;
+}
+
#include "gt-tree.h"
--- gcc/dwarf2out.c.jj 2007-08-28 11:42:19.000000000 +0200
+++ gcc/dwarf2out.c 2007-08-31 18:03:20.000000000 +0200
@@ -12191,6 +12191,10 @@ gen_subprogram_die (tree decl, dw_die_re
add_AT_unsigned (subr_die, DW_AT_inline, DW_INL_not_inlined);
}
+ if (DECL_DECLARED_INLINE_P (decl)
+ && lookup_attribute ("artificial", DECL_ATTRIBUTES (decl)))
+ add_AT_flag (subr_die, DW_AT_artificial, 1);
+
equate_decl_number_to_die (decl, subr_die);
}
else if (!DECL_EXTERNAL (decl))
--- gcc/builtins.c.jj 2007-08-31 11:58:02.000000000 +0200
+++ gcc/builtins.c 2007-08-31 19:09:46.000000000 +0200
@@ -11462,7 +11462,15 @@ expand_builtin_memory_chk (tree exp, rtx
if (! integer_all_onesp (size) && tree_int_cst_lt (size, len))
{
- location_t locus = EXPR_LOCATION (exp);
+ location_t locus = EXPR_LOCATION (exp), *locus_ptr;
+
+ /* If locus is inside an __attribute__((__artificial__))
+ inline routine, report instead the locus from where
+ that inline routine has been called. */
+ locus_ptr = block_nonartificial_location (TREE_BLOCK (exp));
+ if (locus_ptr != NULL)
+ locus = *locus_ptr;
+
warning (0, "%Hcall to %D will always overflow destination buffer",
&locus, get_callee_fndecl (exp));
return NULL_RTX;
--- gcc/c-common.c.jj 2007-08-30 10:19:47.000000000 +0200
+++ gcc/c-common.c 2007-08-31 17:52:49.000000000 +0200
@@ -515,8 +515,8 @@ static tree handle_cold_attribute (tree
static tree handle_noinline_attribute (tree *, tree, tree, int, bool *);
static tree handle_always_inline_attribute (tree *, tree, tree, int,
bool *);
-static tree handle_gnu_inline_attribute (tree *, tree, tree, int,
- bool *);
+static tree handle_gnu_inline_attribute (tree *, tree, tree, int, bool *);
+static tree handle_artificial_attribute (tree *, tree, tree, int, bool *);
static tree handle_flatten_attribute (tree *, tree, tree, int, bool *);
static tree handle_used_attribute (tree *, tree, tree, int, bool *);
static tree handle_unused_attribute (tree *, tree, tree, int, bool *);
@@ -589,6 +589,8 @@ const struct attribute_spec c_common_att
handle_always_inline_attribute },
{ "gnu_inline", 0, 0, true, false, false,
handle_gnu_inline_attribute },
+ { "artificial", 0, 0, true, false, false,
+ handle_artificial_attribute },
{ "flatten", 0, 0, true, false, false,
handle_flatten_attribute },
{ "used", 0, 0, true, false, false,
@@ -4900,6 +4902,29 @@ handle_gnu_inline_attribute (tree *node,
return NULL_TREE;
}
+/* Handle an "artificial" attribute; arguments as in
+ struct attribute_spec.handler. */
+
+static tree
+handle_artificial_attribute (tree *node, tree name,
+ tree ARG_UNUSED (args),
+ int ARG_UNUSED (flags),
+ bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) == FUNCTION_DECL && DECL_DECLARED_INLINE_P (*node))
+ {
+ /* Do nothing else, just set the attribute. We'll get at
+ it later with lookup_attribute. */
+ }
+ else
+ {
+ warning (OPT_Wattributes, "%qE attribute ignored", name);
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
/* Handle a "flatten" attribute; arguments as in
struct attribute_spec.handler. */
--- gcc/final.c.jj 2007-08-23 23:31:24.000000000 +0200
+++ gcc/final.c 2007-08-31 20:10:18.000000000 +0200
@@ -137,6 +137,10 @@ static int high_function_linenum;
/* Filename of last NOTE. */
static const char *last_filename;
+/* Override filename and line number. */
+static const char *override_filename;
+static int override_linenum;
+
/* Whether to force emission of a line note before the next insn. */
static bool force_source_line = false;
@@ -1813,6 +1817,18 @@ final_scan_insn (rtx insn, FILE *file, i
/* Mark this block as output. */
TREE_ASM_WRITTEN (NOTE_BLOCK (insn)) = 1;
}
+ if (write_symbols == DBX_DEBUG
+ || write_symbols == SDB_DEBUG)
+ {
+ location_t *locus_ptr
+ = block_nonartificial_location (NOTE_BLOCK (insn));
+
+ if (locus_ptr != NULL)
+ {
+ override_filename = LOCATION_FILE (*locus_ptr);
+ override_linenum = LOCATION_LINE (*locus_ptr);
+ }
+ }
break;
case NOTE_INSN_BLOCK_END:
@@ -1832,6 +1848,24 @@ final_scan_insn (rtx insn, FILE *file, i
(*debug_hooks->end_block) (high_block_linenum, n);
}
+ if (write_symbols == DBX_DEBUG
+ || write_symbols == SDB_DEBUG)
+ {
+ tree outer_block = BLOCK_SUPERCONTEXT (NOTE_BLOCK (insn));
+ location_t *locus_ptr
+ = block_nonartificial_location (outer_block);
+
+ if (locus_ptr != NULL)
+ {
+ override_filename = LOCATION_FILE (*locus_ptr);
+ override_linenum = LOCATION_LINE (*locus_ptr);
+ }
+ else
+ {
+ override_filename = NULL;
+ override_linenum = 0;
+ }
+ }
break;
case NOTE_INSN_DELETED_LABEL:
@@ -2606,8 +2640,19 @@ final_scan_insn (rtx insn, FILE *file, i
static bool
notice_source_line (rtx insn)
{
- const char *filename = insn_file (insn);
- int linenum = insn_line (insn);
+ const char *filename;
+ int linenum;
+
+ if (override_filename)
+ {
+ filename = override_filename;
+ linenum = override_linenum;
+ }
+ else
+ {
+ filename = insn_file (insn);
+ linenum = insn_line (insn);
+ }
if (filename
&& (force_source_line
--- gcc/doc/extend.texi.jj 2007-08-31 12:00:19.000000000 +0200
+++ gcc/doc/extend.texi 2007-08-31 20:24:16.000000000 +0200
@@ -1638,8 +1638,8 @@ attributes are currently defined for fun
@code{no_instrument_function}, @code{section}, @code{constructor},
@code{destructor}, @code{used}, @code{unused}, @code{deprecated},
@code{weak}, @code{malloc}, @code{alias}, @code{warn_unused_result},
-@code{nonnull}, @code{gnu_inline} and @code{externally_visible},
-@code{hot}, @code{cold}.
+@code{nonnull}, @code{gnu_inline}, @code{externally_visible},
+@code{hot}, @code{cold} and @code{artificial}.
Several other attributes are defined for functions on particular
target systems. Other attributes, including @code{section} are
supported for variables declarations (@pxref{Variable Attributes}) and
@@ -1761,6 +1761,14 @@ In C++, this attribute does not depend o
but it still requires the @code{inline} keyword to enable its special
behavior.
+@cindex @code{artificial} function attribute
+@item artificial
+This attribute is useful for small inline wrappers which if possible
+should appear during debugging as a unit, depending on the debug
+info format it will either mean marking the function as artificial
+or using the caller location for all instructions within the inlined
+body.
+
@cindex @code{flatten} function attribute
@item flatten
Generally, inlining into a function is limited. For a function marked with
--- gcc/testsuite/gcc.dg/memcpy-chk-1.c.jj 2007-08-31 20:38:14.000000000 +0200
+++ gcc/testsuite/gcc.dg/memcpy-chk-1.c 2007-08-31 20:40:01.000000000 +0200
@@ -0,0 +1,34 @@
+/* Verify that will always overflow warning is reported on the caller
+ of __attribute__((__artificial__)) inline rather than within the
+ inline itself. */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef __SIZE_TYPE__ size_t;
+
+extern void *memcpy (void *__restrict, __const void *__restrict, size_t);
+
+extern inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+void *
+memcpy (void *__restrict __dest, __const void *__restrict __src, size_t __len)
+{
+ return __builtin___memcpy_chk (__dest, __src, __len,
+ __builtin_object_size (__dest, 0));
+}
+
+char buf[10];
+volatile int l0;
+
+void
+__attribute__((used))
+foo (void)
+{
+ memcpy (buf, "abcdefghij", 11); /* { dg-warning "will always overflow" } */
+}
+
+int
+main (void)
+{
+ memcpy (buf, "abcdefghijklmnopq", l0 + 9);
+ return 0;
+}
--- gcc/config/i386/emmintrin.h.jj 2007-05-30 14:54:55.000000000 +0200
+++ gcc/config/i386/emmintrin.h 2007-08-31 20:34:11.000000000 +0200
@@ -54,89 +54,89 @@ typedef double __m128d __attribute__ ((_
(((fp1) << 1) | (fp0))
/* Create a vector with element 0 as F and the rest zero. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_set_sd (double __F)
{
return __extension__ (__m128d){ __F, 0.0 };
}
/* Create a vector with both elements equal to F. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_set1_pd (double __F)
{
return __extension__ (__m128d){ __F, __F };
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_set_pd1 (double __F)
{
return _mm_set1_pd (__F);
}
/* Create a vector with the lower value X and upper value W. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_set_pd (double __W, double __X)
{
return __extension__ (__m128d){ __X, __W };
}
/* Create a vector with the lower value W and upper value X. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_setr_pd (double __W, double __X)
{
return __extension__ (__m128d){ __W, __X };
}
/* Create a vector of zeros. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_setzero_pd (void)
{
return __extension__ (__m128d){ 0.0, 0.0 };
}
/* Sets the low DPFP value of A from the low value of B. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_move_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
}
/* Load two DPFP values from P. The address must be 16-byte aligned. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_load_pd (double const *__P)
{
return *(__m128d *)__P;
}
/* Load two DPFP values from P. The address need not be 16-byte aligned. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_loadu_pd (double const *__P)
{
return __builtin_ia32_loadupd (__P);
}
/* Create a vector with all two elements equal to *P. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_load1_pd (double const *__P)
{
return _mm_set1_pd (*__P);
}
/* Create a vector with element 0 as *P and the rest zero. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_load_sd (double const *__P)
{
return _mm_set_sd (*__P);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_load_pd1 (double const *__P)
{
return _mm_load1_pd (__P);
}
/* Load two DPFP values in reverse order. The address must be aligned. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_loadr_pd (double const *__P)
{
__m128d __tmp = _mm_load_pd (__P);
@@ -144,40 +144,40 @@ _mm_loadr_pd (double const *__P)
}
/* Store two DPFP values. The address must be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_store_pd (double *__P, __m128d __A)
{
*(__m128d *)__P = __A;
}
/* Store two DPFP values. The address need not be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_storeu_pd (double *__P, __m128d __A)
{
__builtin_ia32_storeupd (__P, __A);
}
/* Stores the lower DPFP value. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_store_sd (double *__P, __m128d __A)
{
*__P = __builtin_ia32_vec_ext_v2df (__A, 0);
}
-static __inline double __attribute__((__always_inline__))
+static __inline double __attribute__((__always_inline__, __artificial__))
_mm_cvtsd_f64 (__m128d __A)
{
return __builtin_ia32_vec_ext_v2df (__A, 0);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_storel_pd (double *__P, __m128d __A)
{
_mm_store_sd (__P, __A);
}
/* Stores the upper DPFP value. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_storeh_pd (double *__P, __m128d __A)
{
*__P = __builtin_ia32_vec_ext_v2df (__A, 1);
@@ -185,26 +185,26 @@ _mm_storeh_pd (double *__P, __m128d __A)
/* Store the lower DPFP value across two words.
The address must be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_store1_pd (double *__P, __m128d __A)
{
_mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_store_pd1 (double *__P, __m128d __A)
{
_mm_store1_pd (__P, __A);
}
/* Store two DPFP values in reverse order. The address must be aligned. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_storer_pd (double *__P, __m128d __A)
{
_mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cvtsi128_si32 (__m128i __A)
{
return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
@@ -212,221 +212,221 @@ _mm_cvtsi128_si32 (__m128i __A)
#ifdef __x86_64__
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvtsi128_si64 (__m128i __A)
{
return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvtsi128_si64x (__m128i __A)
{
return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
}
#endif
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_add_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_add_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_sub_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_sub_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_mul_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_mul_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_div_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_div_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_sqrt_pd (__m128d __A)
{
return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
}
/* Return pair {sqrt (A[0), B[1]}. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_sqrt_sd (__m128d __A, __m128d __B)
{
__v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_min_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_min_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_max_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_max_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_and_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_andnot_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_or_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_xor_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmplt_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmple_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpge_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpneq_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpnlt_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpnle_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpngt_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpnge_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpord_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpunord_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmplt_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmple_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
@@ -436,7 +436,7 @@ _mm_cmpgt_sd (__m128d __A, __m128d __B)
__A));
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpge_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
@@ -446,25 +446,25 @@ _mm_cmpge_sd (__m128d __A, __m128d __B)
__A));
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpneq_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpnlt_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpnle_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpngt_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
@@ -474,7 +474,7 @@ _mm_cmpngt_sd (__m128d __A, __m128d __B)
__A));
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpnge_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
@@ -484,85 +484,85 @@ _mm_cmpnge_sd (__m128d __A, __m128d __B)
__A));
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpord_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cmpunord_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comieq_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comilt_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comile_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comigt_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comige_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comineq_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomieq_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomilt_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomile_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomigt_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomige_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomineq_sd (__m128d __A, __m128d __B)
{
return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
@@ -570,25 +570,25 @@ _mm_ucomineq_sd (__m128d __A, __m128d __
/* Create a vector of Qi, where i is the element number. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_set_epi64x (long long __q1, long long __q0)
{
return __extension__ (__m128i)(__v2di){ __q0, __q1 };
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_set_epi64 (__m64 __q1, __m64 __q0)
{
return _mm_set_epi64x ((long long)__q1, (long long)__q0);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
{
return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
short __q3, short __q2, short __q1, short __q0)
{
@@ -596,7 +596,7 @@ _mm_set_epi16 (short __q7, short __q6, s
__q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
char __q11, char __q10, char __q09, char __q08,
char __q07, char __q06, char __q05, char __q04,
@@ -610,31 +610,31 @@ _mm_set_epi8 (char __q15, char __q14, ch
/* Set all of the elements of the vector to A. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_set1_epi64x (long long __A)
{
return _mm_set_epi64x (__A, __A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_set1_epi64 (__m64 __A)
{
return _mm_set_epi64 (__A, __A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_set1_epi32 (int __A)
{
return _mm_set_epi32 (__A, __A, __A, __A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_set1_epi16 (short __A)
{
return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_set1_epi8 (char __A)
{
return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
@@ -644,26 +644,26 @@ _mm_set1_epi8 (char __A)
/* Create a vector of Qi, where i is the element number.
The parameter order is reversed from the _mm_set_epi* functions. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_setr_epi64 (__m64 __q0, __m64 __q1)
{
return _mm_set_epi64 (__q1, __q0);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
{
return _mm_set_epi32 (__q3, __q2, __q1, __q0);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
short __q4, short __q5, short __q6, short __q7)
{
return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
char __q04, char __q05, char __q06, char __q07,
char __q08, char __q09, char __q10, char __q11,
@@ -675,134 +675,134 @@ _mm_setr_epi8 (char __q00, char __q01, c
/* Create a vector with element 0 as *P and the rest zero. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_load_si128 (__m128i const *__P)
{
return *__P;
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_loadu_si128 (__m128i const *__P)
{
return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_loadl_epi64 (__m128i const *__P)
{
return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_store_si128 (__m128i *__P, __m128i __B)
{
*__P = __B;
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_storeu_si128 (__m128i *__P, __m128i __B)
{
__builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_storel_epi64 (__m128i *__P, __m128i __B)
{
*(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_movepi64_pi64 (__m128i __B)
{
return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_movpi64_epi64 (__m64 __A)
{
return _mm_set_epi64 ((__m64)0LL, __A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_move_epi64 (__m128i __A)
{
return _mm_set_epi64 ((__m64)0LL, _mm_movepi64_pi64 (__A));
}
/* Create a vector of zeros. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_setzero_si128 (void)
{
return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cvtepi32_pd (__m128i __A)
{
return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtepi32_ps (__m128i __A)
{
return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtpd_epi32 (__m128d __A)
{
return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvtpd_pi32 (__m128d __A)
{
return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtpd_ps (__m128d __A)
{
return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvttpd_epi32 (__m128d __A)
{
return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvttpd_pi32 (__m128d __A)
{
return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cvtpi32_pd (__m64 __A)
{
return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtps_epi32 (__m128 __A)
{
return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvttps_epi32 (__m128 __A)
{
return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cvtps_pd (__m128 __A)
{
return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cvtsd_si32 (__m128d __A)
{
return __builtin_ia32_cvtsd2si ((__v2df) __A);
@@ -810,21 +810,21 @@ _mm_cvtsd_si32 (__m128d __A)
#ifdef __x86_64__
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvtsd_si64 (__m128d __A)
{
return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvtsd_si64x (__m128d __A)
{
return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
}
#endif
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cvttsd_si32 (__m128d __A)
{
return __builtin_ia32_cvttsd2si ((__v2df) __A);
@@ -832,27 +832,27 @@ _mm_cvttsd_si32 (__m128d __A)
#ifdef __x86_64__
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvttsd_si64 (__m128d __A)
{
return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvttsd_si64x (__m128d __A)
{
return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
}
#endif
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtsd_ss (__m128 __A, __m128d __B)
{
return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cvtsi32_sd (__m128d __A, int __B)
{
return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
@@ -860,28 +860,28 @@ _mm_cvtsi32_sd (__m128d __A, int __B)
#ifdef __x86_64__
/* Intel intrinsic. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cvtsi64_sd (__m128d __A, long long __B)
{
return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
}
/* Microsoft intrinsic. */
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cvtsi64x_sd (__m128d __A, long long __B)
{
return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
}
#endif
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_cvtss_sd (__m128d __A, __m128 __B)
{
return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
}
#ifdef __OPTIMIZE__
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
{
return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
@@ -891,242 +891,242 @@ _mm_shuffle_pd(__m128d __A, __m128d __B,
((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C)))
#endif
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_unpackhi_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_unpacklo_pd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_loadh_pd (__m128d __A, double const *__B)
{
return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_loadl_pd (__m128d __A, double const *__B)
{
return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_movemask_pd (__m128d __A)
{
return __builtin_ia32_movmskpd ((__v2df)__A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_packs_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_packs_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_packus_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_add_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_add_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_add_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_add_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_adds_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_adds_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_adds_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_adds_epu16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sub_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sub_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sub_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sub_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_subs_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_subs_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_subs_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_subs_epu16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_madd_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_mulhi_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_mullo_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_mul_su32 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_mul_epu32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
}
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_slli_epi16 (__m128i __A, const int __B)
{
return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_slli_epi32 (__m128i __A, const int __B)
{
return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_slli_epi64 (__m128i __A, const int __B)
{
return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
@@ -1141,13 +1141,13 @@ _mm_slli_epi64 (__m128i __A, const int _
#endif
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_srai_epi16 (__m128i __A, const int __B)
{
return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_srai_epi32 (__m128i __A, const int __B)
{
return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
@@ -1160,13 +1160,13 @@ _mm_srai_epi32 (__m128i __A, const int _
#endif
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_srli_si128 (__m128i __A, const int __B)
{
return (__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_slli_si128 (__m128i __A, const int __B)
{
return (__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8);
@@ -1179,19 +1179,19 @@ _mm_slli_si128 (__m128i __A, const int _
#endif
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_srli_epi16 (__m128i __A, const int __B)
{
return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_srli_epi32 (__m128i __A, const int __B)
{
return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_srli_epi64 (__m128i __A, const int __B)
{
return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
@@ -1205,140 +1205,140 @@ _mm_srli_epi64 (__m128i __A, const int _
((__m128i)__builtin_ia32_psrlqi128 ((__v2di)(__A), __B))
#endif
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sll_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sll_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sll_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sra_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sra_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_srl_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_srl_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_srl_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_and_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_andnot_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_or_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_xor_si128 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmplt_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmplt_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmplt_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
}
#ifdef __OPTIMIZE__
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_extract_epi16 (__m128i const __A, int const __N)
{
return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
{
return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
@@ -1350,56 +1350,56 @@ _mm_insert_epi16 (__m128i const __A, int
((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(A), (D), (N)))
#endif
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_max_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_max_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_min_epi16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_min_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_movemask_epi8 (__m128i __A)
{
return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_mulhi_epu16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
}
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_shufflehi_epi16 (__m128i __A, const int __mask)
{
return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_shufflelo_epi16 (__m128i __A, const int __mask)
{
return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_shuffle_epi32 (__m128i __A, const int __mask)
{
return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
@@ -1413,67 +1413,67 @@ _mm_shuffle_epi32 (__m128i __A, const in
((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
#endif
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
{
__builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_avg_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_avg_epu16 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sad_epu8 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_stream_si32 (int *__A, int __B)
{
__builtin_ia32_movnti (__A, __B);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_stream_si128 (__m128i *__A, __m128i __B)
{
__builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_stream_pd (double *__A, __m128d __B)
{
__builtin_ia32_movntpd (__A, (__v2df)__B);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_clflush (void const *__A)
{
__builtin_ia32_clflush (__A);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_lfence (void)
{
__builtin_ia32_lfence ();
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_mfence (void)
{
__builtin_ia32_mfence ();
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtsi32_si128 (int __A)
{
return _mm_set_epi32 (0, 0, 0, __A);
@@ -1481,14 +1481,14 @@ _mm_cvtsi32_si128 (int __A)
#ifdef __x86_64__
/* Intel intrinsic. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtsi64_si128 (long long __A)
{
return _mm_set_epi64x (0, __A);
}
/* Microsoft intrinsic. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtsi64x_si128 (long long __A)
{
return _mm_set_epi64x (0, __A);
@@ -1497,37 +1497,37 @@ _mm_cvtsi64x_si128 (long long __A)
/* Casts between various SP, DP, INT vector types. Note that these do no
conversion of values, they just change the type. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_castpd_ps(__m128d __A)
{
return (__m128) __A;
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_castpd_si128(__m128d __A)
{
return (__m128i) __A;
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_castps_pd(__m128 __A)
{
return (__m128d) __A;
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_castps_si128(__m128 __A)
{
return (__m128i) __A;
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_castsi128_ps(__m128i __A)
{
return (__m128) __A;
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_castsi128_pd(__m128i __A)
{
return (__m128d) __A;
--- gcc/config/i386/mmintrin.h.jj 2007-03-26 15:03:13.000000000 +0200
+++ gcc/config/i386/mmintrin.h 2007-08-31 20:32:24.000000000 +0200
@@ -1,5 +1,5 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
- Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software
+ Foundation, Inc.
This file is part of GCC.
@@ -44,26 +44,26 @@ typedef short __v4hi __attribute__ ((__v
typedef char __v8qi __attribute__ ((__vector_size__ (8)));
/* Empty the multimedia state. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_empty (void)
{
__builtin_ia32_emms ();
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_m_empty (void)
{
_mm_empty ();
}
/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvtsi32_si64 (int __i)
{
return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_from_int (int __i)
{
return _mm_cvtsi32_si64 (__i);
@@ -73,26 +73,26 @@ _m_from_int (int __i)
/* Convert I to a __m64 object. */
/* Intel intrinsic. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_from_int64 (long long __i)
{
return (__m64) __i;
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvtsi64_m64 (long long __i)
{
return (__m64) __i;
}
/* Microsoft intrinsic. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvtsi64x_si64 (long long __i)
{
return (__m64) __i;
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_set_pi64x (long long __i)
{
return (__m64) __i;
@@ -100,13 +100,13 @@ _mm_set_pi64x (long long __i)
#endif
/* Convert the lower 32 bits of the __m64 object into an integer. */
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cvtsi64_si32 (__m64 __i)
{
return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_m_to_int (__m64 __i)
{
return _mm_cvtsi64_si32 (__i);
@@ -116,20 +116,20 @@ _m_to_int (__m64 __i)
/* Convert the __m64 object to a 64bit integer. */
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_m_to_int64 (__m64 __i)
{
return (long long)__i;
}
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvtm64_si64 (__m64 __i)
{
return (long long)__i;
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvtsi64_si64x (__m64 __i)
{
return (long long)__i;
@@ -139,13 +139,13 @@ _mm_cvtsi64_si64x (__m64 __i)
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
the result, and the four 16-bit values from M2 into the upper four 8-bit
values of the result, all with signed saturation. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_packs_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_packsswb (__m64 __m1, __m64 __m2)
{
return _mm_packs_pi16 (__m1, __m2);
@@ -154,13 +154,13 @@ _m_packsswb (__m64 __m1, __m64 __m2)
/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
the result, and the two 32-bit values from M2 into the upper two 16-bit
values of the result, all with signed saturation. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_packs_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_packssdw (__m64 __m1, __m64 __m2)
{
return _mm_packs_pi32 (__m1, __m2);
@@ -169,13 +169,13 @@ _m_packssdw (__m64 __m1, __m64 __m2)
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
the result, and the four 16-bit values from M2 into the upper four 8-bit
values of the result, all with unsigned saturation. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_packs_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_packuswb (__m64 __m1, __m64 __m2)
{
return _mm_packs_pu16 (__m1, __m2);
@@ -183,13 +183,13 @@ _m_packuswb (__m64 __m1, __m64 __m2)
/* Interleave the four 8-bit values from the high half of M1 with the four
8-bit values from the high half of M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_punpckhbw (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi8 (__m1, __m2);
@@ -197,13 +197,13 @@ _m_punpckhbw (__m64 __m1, __m64 __m2)
/* Interleave the two 16-bit values from the high half of M1 with the two
16-bit values from the high half of M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_punpckhwd (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi16 (__m1, __m2);
@@ -211,13 +211,13 @@ _m_punpckhwd (__m64 __m1, __m64 __m2)
/* Interleave the 32-bit value from the high half of M1 with the 32-bit
value from the high half of M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_punpckhdq (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi32 (__m1, __m2);
@@ -225,13 +225,13 @@ _m_punpckhdq (__m64 __m1, __m64 __m2)
/* Interleave the four 8-bit values from the low half of M1 with the four
8-bit values from the low half of M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_punpcklbw (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi8 (__m1, __m2);
@@ -239,13 +239,13 @@ _m_punpcklbw (__m64 __m1, __m64 __m2)
/* Interleave the two 16-bit values from the low half of M1 with the two
16-bit values from the low half of M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_punpcklwd (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi16 (__m1, __m2);
@@ -253,52 +253,52 @@ _m_punpcklwd (__m64 __m1, __m64 __m2)
/* Interleave the 32-bit value from the low half of M1 with the 32-bit
value from the low half of M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_punpckldq (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi32 (__m1, __m2);
}
/* Add the 8-bit values in M1 to the 8-bit values in M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_add_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_paddb (__m64 __m1, __m64 __m2)
{
return _mm_add_pi8 (__m1, __m2);
}
/* Add the 16-bit values in M1 to the 16-bit values in M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_add_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_paddw (__m64 __m1, __m64 __m2)
{
return _mm_add_pi16 (__m1, __m2);
}
/* Add the 32-bit values in M1 to the 32-bit values in M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_add_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_paddd (__m64 __m1, __m64 __m2)
{
return _mm_add_pi32 (__m1, __m2);
@@ -306,7 +306,7 @@ _m_paddd (__m64 __m1, __m64 __m2)
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifdef __SSE2__
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_add_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
@@ -315,13 +315,13 @@ _mm_add_si64 (__m64 __m1, __m64 __m2)
/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
saturated arithmetic. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_adds_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_paddsb (__m64 __m1, __m64 __m2)
{
return _mm_adds_pi8 (__m1, __m2);
@@ -329,13 +329,13 @@ _m_paddsb (__m64 __m1, __m64 __m2)
/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
saturated arithmetic. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_adds_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_paddsw (__m64 __m1, __m64 __m2)
{
return _mm_adds_pi16 (__m1, __m2);
@@ -343,13 +343,13 @@ _m_paddsw (__m64 __m1, __m64 __m2)
/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
saturated arithmetic. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_adds_pu8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_paddusb (__m64 __m1, __m64 __m2)
{
return _mm_adds_pu8 (__m1, __m2);
@@ -357,52 +357,52 @@ _m_paddusb (__m64 __m1, __m64 __m2)
/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
saturated arithmetic. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_adds_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_paddusw (__m64 __m1, __m64 __m2)
{
return _mm_adds_pu16 (__m1, __m2);
}
/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sub_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psubb (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi8 (__m1, __m2);
}
/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sub_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psubw (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi16 (__m1, __m2);
}
/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sub_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psubd (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi32 (__m1, __m2);
@@ -410,7 +410,7 @@ _m_psubd (__m64 __m1, __m64 __m2)
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
#ifdef __SSE2__
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sub_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
@@ -419,13 +419,13 @@ _mm_sub_si64 (__m64 __m1, __m64 __m2)
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
saturating arithmetic. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_subs_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psubsb (__m64 __m1, __m64 __m2)
{
return _mm_subs_pi8 (__m1, __m2);
@@ -433,13 +433,13 @@ _m_psubsb (__m64 __m1, __m64 __m2)
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
signed saturating arithmetic. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_subs_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psubsw (__m64 __m1, __m64 __m2)
{
return _mm_subs_pi16 (__m1, __m2);
@@ -447,13 +447,13 @@ _m_psubsw (__m64 __m1, __m64 __m2)
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
unsigned saturating arithmetic. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_subs_pu8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psubusb (__m64 __m1, __m64 __m2)
{
return _mm_subs_pu8 (__m1, __m2);
@@ -461,13 +461,13 @@ _m_psubusb (__m64 __m1, __m64 __m2)
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
unsigned saturating arithmetic. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_subs_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psubusw (__m64 __m1, __m64 __m2)
{
return _mm_subs_pu16 (__m1, __m2);
@@ -476,13 +476,13 @@ _m_psubusw (__m64 __m1, __m64 __m2)
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
four 32-bit intermediate results, which are then summed by pairs to
produce two 32-bit results. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_madd_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pmaddwd (__m64 __m1, __m64 __m2)
{
return _mm_madd_pi16 (__m1, __m2);
@@ -490,13 +490,13 @@ _m_pmaddwd (__m64 __m1, __m64 __m2)
/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
M2 and produce the high 16 bits of the 32-bit results. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pmulhw (__m64 __m1, __m64 __m2)
{
return _mm_mulhi_pi16 (__m1, __m2);
@@ -504,226 +504,226 @@ _m_pmulhw (__m64 __m1, __m64 __m2)
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
the low 16 bits of the results. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pmullw (__m64 __m1, __m64 __m2)
{
return _mm_mullo_pi16 (__m1, __m2);
}
/* Shift four 16-bit values in M left by COUNT. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sll_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psllw (__m64 __m, __m64 __count)
{
return _mm_sll_pi16 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_slli_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psllwi (__m64 __m, int __count)
{
return _mm_slli_pi16 (__m, __count);
}
/* Shift two 32-bit values in M left by COUNT. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sll_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pslld (__m64 __m, __m64 __count)
{
return _mm_sll_pi32 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_slli_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pslldi (__m64 __m, int __count)
{
return _mm_slli_pi32 (__m, __count);
}
/* Shift the 64-bit value in M left by COUNT. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sll_si64 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psllq (__m64 __m, __m64 __count)
{
return _mm_sll_si64 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_slli_si64 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psllqi (__m64 __m, int __count)
{
return _mm_slli_si64 (__m, __count);
}
/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sra_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psraw (__m64 __m, __m64 __count)
{
return _mm_sra_pi16 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srai_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psrawi (__m64 __m, int __count)
{
return _mm_srai_pi16 (__m, __count);
}
/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sra_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psrad (__m64 __m, __m64 __count)
{
return _mm_sra_pi32 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srai_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psradi (__m64 __m, int __count)
{
return _mm_srai_pi32 (__m, __count);
}
/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srl_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psrlw (__m64 __m, __m64 __count)
{
return _mm_srl_pi16 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srli_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psrlwi (__m64 __m, int __count)
{
return _mm_srli_pi16 (__m, __count);
}
/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srl_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psrld (__m64 __m, __m64 __count)
{
return _mm_srl_pi32 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srli_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psrldi (__m64 __m, int __count)
{
return _mm_srli_pi32 (__m, __count);
}
/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srl_si64 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psrlq (__m64 __m, __m64 __count)
{
return _mm_srl_si64 (__m, __count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_srli_si64 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psrlqi (__m64 __m, int __count)
{
return _mm_srli_si64 (__m, __count);
}
/* Bit-wise AND the 64-bit values in M1 and M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_and_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pand (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pand (__m64 __m1, __m64 __m2)
{
return _mm_and_si64 (__m1, __m2);
@@ -731,39 +731,39 @@ _m_pand (__m64 __m1, __m64 __m2)
/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
64-bit value in M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_andnot_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pandn (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pandn (__m64 __m1, __m64 __m2)
{
return _mm_andnot_si64 (__m1, __m2);
}
/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_or_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_por (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_por (__m64 __m1, __m64 __m2)
{
return _mm_or_si64 (__m1, __m2);
}
/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_xor_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pxor (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pxor (__m64 __m1, __m64 __m2)
{
return _mm_xor_si64 (__m1, __m2);
@@ -771,25 +771,25 @@ _m_pxor (__m64 __m1, __m64 __m2)
/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
test is true and zero if false. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pcmpeqb (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi8 (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pcmpgtb (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi8 (__m1, __m2);
@@ -797,25 +797,25 @@ _m_pcmpgtb (__m64 __m1, __m64 __m2)
/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
the test is true and zero if false. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pcmpeqw (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi16 (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pcmpgtw (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi16 (__m1, __m2);
@@ -823,53 +823,53 @@ _m_pcmpgtw (__m64 __m1, __m64 __m2)
/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
the test is true and zero if false. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pcmpeqd (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi32 (__m1, __m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pcmpgtd (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi32 (__m1, __m2);
}
/* Creates a 64-bit zero. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_setzero_si64 (void)
{
return (__m64)0LL;
}
/* Creates a vector of two 32-bit values; I0 is least significant. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_set_pi32 (int __i1, int __i0)
{
return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
}
/* Creates a vector of four 16-bit values; W0 is least significant. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
{
return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
}
/* Creates a vector of eight 8-bit values; B0 is least significant. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
char __b3, char __b2, char __b1, char __b0)
{
@@ -878,19 +878,19 @@ _mm_set_pi8 (char __b7, char __b6, char
}
/* Similar, but with the arguments in reverse order. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_setr_pi32 (int __i0, int __i1)
{
return _mm_set_pi32 (__i1, __i0);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
{
return _mm_set_pi16 (__w3, __w2, __w1, __w0);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
char __b4, char __b5, char __b6, char __b7)
{
@@ -898,21 +898,21 @@ _mm_setr_pi8 (char __b0, char __b1, char
}
/* Creates a vector of two 32-bit values, both elements containing I. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_set1_pi32 (int __i)
{
return _mm_set_pi32 (__i, __i);
}
/* Creates a vector of four 16-bit values, all elements containing W. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_set1_pi16 (short __w)
{
return _mm_set_pi16 (__w, __w, __w, __w);
}
/* Creates a vector of eight 8-bit values, all elements containing B. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_set1_pi8 (char __b)
{
return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
--- gcc/config/i386/tmmintrin.h.jj 2007-05-30 14:54:55.000000000 +0200
+++ gcc/config/i386/tmmintrin.h 2007-08-31 20:34:25.000000000 +0200
@@ -37,157 +37,157 @@
/* We need definitions from the SSE3, SSE2 and SSE header files*/
#include <pmmintrin.h>
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_hadd_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_hadd_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_hadds_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_hadd_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_hadd_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_hadds_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_hsub_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_hsub_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_hsub_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_hsub_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sign_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sign_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_sign_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sign_pi8 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sign_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sign_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
}
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
{
return (__m128i)__builtin_ia32_palignr128 ((__v2di)__X, (__v2di)__Y, __N * 8);}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
{
return (__m64)__builtin_ia32_palignr ((long long)__X, (long long)__Y, __N * 8);
@@ -199,37 +199,37 @@ _mm_alignr_pi8(__m64 __X, __m64 __Y, con
((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
#endif
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_abs_epi8 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_abs_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_abs_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_abs_pi8 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_abs_pi16 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_abs_pi32 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
--- gcc/config/i386/mm3dnow.h.jj 2007-05-21 16:34:56.000000000 +0200
+++ gcc/config/i386/mm3dnow.h 2007-08-31 20:36:26.000000000 +0200
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2004, 2007 Free Software Foundation, Inc.
This file is part of GCC.
@@ -37,145 +37,145 @@
/* Internal data types for implementing the intrinsics. */
typedef float __v2sf __attribute__ ((__vector_size__ (8)));
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_m_femms (void)
{
__builtin_ia32_femms();
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pavgusb (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pf2id (__m64 __A)
{
return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfadd (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfcmpeq (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfcmpge (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfcmpgt (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfmax (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfmin (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfmul (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfrcp (__m64 __A)
{
return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfrcpit1 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfrcpit2 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfrsqrt (__m64 __A)
{
return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfrsqit1 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfsub (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfsubr (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pi2fd (__m64 __A)
{
return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pmulhrw (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_m_prefetch (void *__P)
{
__builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_m_prefetchw (void *__P)
{
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_from_float (float __A)
{
return __extension__ (__m64)(__v2sf){ __A, 0.0f };
}
-static __inline float __attribute__((__always_inline__))
+static __inline float __attribute__((__always_inline__, __artificial__))
_m_to_float (__m64 __A)
{
union { __v2sf v; float a[2]; } __tmp;
@@ -185,31 +185,31 @@ _m_to_float (__m64 __A)
#ifdef __3dNOW_A__
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pf2iw (__m64 __A)
{
return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfnacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pfpnacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pi2fw (__m64 __A)
{
return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pswapd (__m64 __A)
{
return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
--- gcc/config/i386/pmmintrin.h.jj 2007-05-21 16:34:56.000000000 +0200
+++ gcc/config/i386/pmmintrin.h 2007-08-31 20:35:36.000000000 +0200
@@ -47,79 +47,79 @@
#define _MM_GET_DENORMALS_ZERO_MODE() \
(_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_addsub_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_hadd_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_hsub_ps (__m128 __X, __m128 __Y)
{
return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_movehdup_ps (__m128 __X)
{
return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_moveldup_ps (__m128 __X)
{
return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_addsub_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_hadd_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_hsub_pd (__m128d __X, __m128d __Y)
{
return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_loaddup_pd (double const *__P)
{
return _mm_load1_pd (__P);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_movedup_pd (__m128d __X)
{
return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_lddqu_si128 (__m128i const *__P)
{
return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
{
__builtin_ia32_monitor (__P, __E, __H);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_mwait (unsigned int __E, unsigned int __H)
{
__builtin_ia32_mwait (__E, __H);
--- gcc/config/i386/ammintrin.h.jj 2007-05-30 14:54:55.000000000 +0200
+++ gcc/config/i386/ammintrin.h 2007-08-31 20:35:52.000000000 +0200
@@ -37,26 +37,26 @@
/* We need definitions from the SSE3, SSE2 and SSE header files*/
#include <pmmintrin.h>
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_stream_sd (double * __P, __m128d __Y)
{
__builtin_ia32_movntsd (__P, (__v2df) __Y);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_stream_ss (float * __P, __m128 __Y)
{
__builtin_ia32_movntss (__P, (__v4sf) __Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_extract_si64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
}
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
{
return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
@@ -66,14 +66,14 @@ _mm_extracti_si64 (__m128i __X, unsigned
((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L))
#endif
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_insert_si64 (__m128i __X,__m128i __Y)
{
return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
}
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
{
return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
--- gcc/config/i386/xmmintrin.h.jj 2007-05-30 14:54:55.000000000 +0200
+++ gcc/config/i386/xmmintrin.h 2007-08-31 20:33:49.000000000 +0200
@@ -89,7 +89,7 @@ enum _mm_hint
#define _MM_FLUSH_ZERO_OFF 0x0000
/* Create a vector of zeros. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_setzero_ps (void)
{
return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
@@ -99,55 +99,55 @@ _mm_setzero_ps (void)
floating-point) values of A and B; the upper three SPFP values are
passed through from A. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_add_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_sub_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_mul_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_div_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_sqrt_ss (__m128 __A)
{
return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_rcp_ss (__m128 __A)
{
return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_rsqrt_ss (__m128 __A)
{
return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_min_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_max_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
@@ -155,55 +155,55 @@ _mm_max_ss (__m128 __A, __m128 __B)
/* Perform the respective operation on the four SPFP values in A and B. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_add_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_sub_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_mul_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_div_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_sqrt_ps (__m128 __A)
{
return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_rcp_ps (__m128 __A)
{
return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_rsqrt_ps (__m128 __A)
{
return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_min_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_max_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
@@ -211,25 +211,25 @@ _mm_max_ps (__m128 __A, __m128 __B)
/* Perform logical bit-wise operations on 128-bit values. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_and_ps (__m128 __A, __m128 __B)
{
return __builtin_ia32_andps (__A, __B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_andnot_ps (__m128 __A, __m128 __B)
{
return __builtin_ia32_andnps (__A, __B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_or_ps (__m128 __A, __m128 __B)
{
return __builtin_ia32_orps (__A, __B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_xor_ps (__m128 __A, __m128 __B)
{
return __builtin_ia32_xorps (__A, __B);
@@ -239,25 +239,25 @@ _mm_xor_ps (__m128 __A, __m128 __B)
comparison is true, place a mask of all ones in the result, otherwise a
mask of zeros. The upper three SPFP values are passed through from A. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmplt_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmple_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf) __A,
@@ -267,7 +267,7 @@ _mm_cmpgt_ss (__m128 __A, __m128 __B)
__A));
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpge_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf) __A,
@@ -277,25 +277,25 @@ _mm_cmpge_ss (__m128 __A, __m128 __B)
__A));
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpneq_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpnlt_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpnle_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpngt_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf) __A,
@@ -305,7 +305,7 @@ _mm_cmpngt_ss (__m128 __A, __m128 __B)
__A));
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpnge_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf) __A,
@@ -315,13 +315,13 @@ _mm_cmpnge_ss (__m128 __A, __m128 __B)
__A));
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpord_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpunord_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
@@ -331,73 +331,73 @@ _mm_cmpunord_ss (__m128 __A, __m128 __B)
element, if the comparison is true, place a mask of all ones in the
result, otherwise a mask of zeros. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmplt_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmple_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpge_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpneq_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpnlt_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpnle_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpngt_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpnge_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpord_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cmpunord_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
@@ -406,73 +406,73 @@ _mm_cmpunord_ps (__m128 __A, __m128 __B)
/* Compare the lower SPFP values of A and B and return 1 if true
and 0 if false. */
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comieq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comilt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comile_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comigt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comige_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_comineq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomieq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomilt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomile_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomigt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomige_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_ucomineq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
@@ -480,13 +480,13 @@ _mm_ucomineq_ss (__m128 __A, __m128 __B)
/* Convert the lower SPFP value to a 32-bit integer according to the current
rounding mode. */
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cvtss_si32 (__m128 __A)
{
return __builtin_ia32_cvtss2si ((__v4sf) __A);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cvt_ss2si (__m128 __A)
{
return _mm_cvtss_si32 (__A);
@@ -497,14 +497,14 @@ _mm_cvt_ss2si (__m128 __A)
current rounding mode. */
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvtss_si64 (__m128 __A)
{
return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvtss_si64x (__m128 __A)
{
return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
@@ -513,26 +513,26 @@ _mm_cvtss_si64x (__m128 __A)
/* Convert the two lower SPFP values to 32-bit integers according to the
current rounding mode. Return the integers in packed form. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvtps_pi32 (__m128 __A)
{
return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvt_ps2pi (__m128 __A)
{
return _mm_cvtps_pi32 (__A);
}
/* Truncate the lower SPFP value to a 32-bit integer. */
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cvttss_si32 (__m128 __A)
{
return __builtin_ia32_cvttss2si ((__v4sf) __A);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cvtt_ss2si (__m128 __A)
{
return _mm_cvttss_si32 (__A);
@@ -542,14 +542,14 @@ _mm_cvtt_ss2si (__m128 __A)
/* Truncate the lower SPFP value to a 32-bit integer. */
/* Intel intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvttss_si64 (__m128 __A)
{
return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
}
/* Microsoft intrinsic. */
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_cvttss_si64x (__m128 __A)
{
return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
@@ -558,26 +558,26 @@ _mm_cvttss_si64x (__m128 __A)
/* Truncate the two lower SPFP values to 32-bit integers. Return the
integers in packed form. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvttps_pi32 (__m128 __A)
{
return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvtt_ps2pi (__m128 __A)
{
return _mm_cvttps_pi32 (__A);
}
/* Convert B to a SPFP value and insert it as element zero in A. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtsi32_ss (__m128 __A, int __B)
{
return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvt_si2ss (__m128 __A, int __B)
{
return _mm_cvtsi32_ss (__A, __B);
@@ -587,14 +587,14 @@ _mm_cvt_si2ss (__m128 __A, int __B)
/* Convert B to a SPFP value and insert it as element zero in A. */
/* Intel intrinsic. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtsi64_ss (__m128 __A, long long __B)
{
return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
}
/* Microsoft intrinsic. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtsi64x_ss (__m128 __A, long long __B)
{
return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
@@ -603,20 +603,20 @@ _mm_cvtsi64x_ss (__m128 __A, long long _
/* Convert the two 32-bit values in B to SPFP form and insert them
as the two lower elements in A. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtpi32_ps (__m128 __A, __m64 __B)
{
return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvt_pi2ps (__m128 __A, __m64 __B)
{
return _mm_cvtpi32_ps (__A, __B);
}
/* Convert the four signed 16-bit values in A to SPFP form. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtpi16_ps (__m64 __A)
{
__v4hi __sign;
@@ -642,7 +642,7 @@ _mm_cvtpi16_ps (__m64 __A)
}
/* Convert the four unsigned 16-bit values in A to SPFP form. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtpu16_ps (__m64 __A)
{
__v2si __hisi, __losi;
@@ -662,7 +662,7 @@ _mm_cvtpu16_ps (__m64 __A)
}
/* Convert the low four signed 8-bit values in A to SPFP form. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtpi8_ps (__m64 __A)
{
__v8qi __sign;
@@ -679,7 +679,7 @@ _mm_cvtpi8_ps (__m64 __A)
}
/* Convert the low four unsigned 8-bit values in A to SPFP form. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtpu8_ps(__m64 __A)
{
__A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
@@ -687,7 +687,7 @@ _mm_cvtpu8_ps(__m64 __A)
}
/* Convert the four signed 32-bit values in A and B to SPFP form. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
{
__v4sf __zero = (__v4sf) _mm_setzero_ps ();
@@ -697,7 +697,7 @@ _mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
}
/* Convert the four SPFP values in A to four signed 16-bit integers. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvtps_pi16(__m128 __A)
{
__v4sf __hisf = (__v4sf)__A;
@@ -708,7 +708,7 @@ _mm_cvtps_pi16(__m128 __A)
}
/* Convert the four SPFP values in A to four signed 8-bit integers. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_cvtps_pi8(__m128 __A)
{
__v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
@@ -717,7 +717,7 @@ _mm_cvtps_pi8(__m128 __A)
/* Selects four specific SPFP values from A and B based on MASK. */
#ifdef __OPTIMIZE__
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask)
{
return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
@@ -728,14 +728,14 @@ _mm_shuffle_ps (__m128 __A, __m128 __B,
#endif
/* Selects and interleaves the upper two SPFP values from A and B. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_unpackhi_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
}
/* Selects and interleaves the lower two SPFP values from A and B. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_unpacklo_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
@@ -743,28 +743,28 @@ _mm_unpacklo_ps (__m128 __A, __m128 __B)
/* Sets the upper two SPFP values with 64-bits of data loaded from P;
the lower two values are passed through from A. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_loadh_pi (__m128 __A, __m64 const *__P)
{
return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P);
}
/* Stores the upper two SPFP values of A into P. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_storeh_pi (__m64 *__P, __m128 __A)
{
__builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A);
}
/* Moves the upper two values of B into the lower two values of A. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_movehl_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
}
/* Moves the lower two values of B into the upper two values of A. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_movelh_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
@@ -772,146 +772,146 @@ _mm_movelh_ps (__m128 __A, __m128 __B)
/* Sets the lower two SPFP values with 64-bits of data loaded from P;
the upper two values are passed through from A. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_loadl_pi (__m128 __A, __m64 const *__P)
{
return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P);
}
/* Stores the lower two SPFP values of A into P. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_storel_pi (__m64 *__P, __m128 __A)
{
__builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A);
}
/* Creates a 4-bit mask from the most significant bits of the SPFP values. */
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_movemask_ps (__m128 __A)
{
return __builtin_ia32_movmskps ((__v4sf)__A);
}
/* Return the contents of the control register. */
-static __inline unsigned int __attribute__((__always_inline__))
+static __inline unsigned int __attribute__((__always_inline__, __artificial__))
_mm_getcsr (void)
{
return __builtin_ia32_stmxcsr ();
}
/* Read exception bits from the control register. */
-static __inline unsigned int __attribute__((__always_inline__))
+static __inline unsigned int __attribute__((__always_inline__, __artificial__))
_MM_GET_EXCEPTION_STATE (void)
{
return _mm_getcsr() & _MM_EXCEPT_MASK;
}
-static __inline unsigned int __attribute__((__always_inline__))
+static __inline unsigned int __attribute__((__always_inline__, __artificial__))
_MM_GET_EXCEPTION_MASK (void)
{
return _mm_getcsr() & _MM_MASK_MASK;
}
-static __inline unsigned int __attribute__((__always_inline__))
+static __inline unsigned int __attribute__((__always_inline__, __artificial__))
_MM_GET_ROUNDING_MODE (void)
{
return _mm_getcsr() & _MM_ROUND_MASK;
}
-static __inline unsigned int __attribute__((__always_inline__))
+static __inline unsigned int __attribute__((__always_inline__, __artificial__))
_MM_GET_FLUSH_ZERO_MODE (void)
{
return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
}
/* Set the control register to I. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_setcsr (unsigned int __I)
{
__builtin_ia32_ldmxcsr (__I);
}
/* Set exception bits in the control register. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_MM_SET_EXCEPTION_STATE(unsigned int __mask)
{
_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_MM_SET_EXCEPTION_MASK (unsigned int __mask)
{
_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_MM_SET_ROUNDING_MODE (unsigned int __mode)
{
_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
{
_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
}
/* Create a vector with element 0 as F and the rest zero. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_set_ss (float __F)
{
return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
}
/* Create a vector with all four elements equal to F. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_set1_ps (float __F)
{
return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_set_ps1 (float __F)
{
return _mm_set1_ps (__F);
}
/* Create a vector with element 0 as *P and the rest zero. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_load_ss (float const *__P)
{
return _mm_set_ss (*__P);
}
/* Create a vector with all four elements equal to *P. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_load1_ps (float const *__P)
{
return _mm_set1_ps (*__P);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_load_ps1 (float const *__P)
{
return _mm_load1_ps (__P);
}
/* Load four SPFP values from P. The address must be 16-byte aligned. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_load_ps (float const *__P)
{
return (__m128) *(__v4sf *)__P;
}
/* Load four SPFP values from P. The address need not be 16-byte aligned. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_loadu_ps (float const *__P)
{
return (__m128) __builtin_ia32_loadups (__P);
}
/* Load four SPFP values in reverse order. The address must be aligned. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_loadr_ps (float const *__P)
{
__v4sf __tmp = *(__v4sf *)__P;
@@ -919,48 +919,48 @@ _mm_loadr_ps (float const *__P)
}
/* Create the vector [Z Y X W]. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
{
return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
}
/* Create the vector [W X Y Z]. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_setr_ps (float __Z, float __Y, float __X, float __W)
{
return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
}
/* Stores the lower SPFP value. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_store_ss (float *__P, __m128 __A)
{
*__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
}
-static __inline float __attribute__((__always_inline__))
+static __inline float __attribute__((__always_inline__, __artificial__))
_mm_cvtss_f32 (__m128 __A)
{
return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
}
/* Store four SPFP values. The address must be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_store_ps (float *__P, __m128 __A)
{
*(__v4sf *)__P = (__v4sf)__A;
}
/* Store four SPFP values. The address need not be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_storeu_ps (float *__P, __m128 __A)
{
__builtin_ia32_storeups (__P, (__v4sf)__A);
}
/* Store the lower SPFP value across four words. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_store1_ps (float *__P, __m128 __A)
{
__v4sf __va = (__v4sf)__A;
@@ -968,14 +968,14 @@ _mm_store1_ps (float *__P, __m128 __A)
_mm_storeu_ps (__P, __tmp);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_store_ps1 (float *__P, __m128 __A)
{
_mm_store1_ps (__P, __A);
}
/* Store four SPFP values in reverse order. The address must be aligned. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_storer_ps (float *__P, __m128 __A)
{
__v4sf __va = (__v4sf)__A;
@@ -984,7 +984,7 @@ _mm_storer_ps (float *__P, __m128 __A)
}
/* Sets the low SPFP value of A from the low value of B. */
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_move_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
@@ -992,13 +992,13 @@ _mm_move_ss (__m128 __A, __m128 __B)
/* Extracts one of the four words of A. The selector N must be immediate. */
#ifdef __OPTIMIZE__
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_extract_pi16 (__m64 const __A, int const __N)
{
return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_m_pextrw (__m64 const __A, int const __N)
{
return _mm_extract_pi16 (__A, __N);
@@ -1011,13 +1011,13 @@ _m_pextrw (__m64 const __A, int const __
/* Inserts word D into one of four words of A. The selector N must be
immediate. */
#ifdef __OPTIMIZE__
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
{
return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pinsrw (__m64 const __A, int const __D, int const __N)
{
return _mm_insert_pi16 (__A, __D, __N);
@@ -1029,65 +1029,65 @@ _m_pinsrw (__m64 const __A, int const __
#endif
/* Compute the element-wise maximum of signed 16-bit values. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_max_pi16 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pmaxsw (__m64 __A, __m64 __B)
{
return _mm_max_pi16 (__A, __B);
}
/* Compute the element-wise maximum of unsigned 8-bit values. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_max_pu8 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pmaxub (__m64 __A, __m64 __B)
{
return _mm_max_pu8 (__A, __B);
}
/* Compute the element-wise minimum of signed 16-bit values. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_min_pi16 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pminsw (__m64 __A, __m64 __B)
{
return _mm_min_pi16 (__A, __B);
}
/* Compute the element-wise minimum of unsigned 8-bit values. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_min_pu8 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pminub (__m64 __A, __m64 __B)
{
return _mm_min_pu8 (__A, __B);
}
/* Create an 8-bit mask of the signs of 8-bit values. */
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_movemask_pi8 (__m64 __A)
{
return __builtin_ia32_pmovmskb ((__v8qi)__A);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_m_pmovmskb (__m64 __A)
{
return _mm_movemask_pi8 (__A);
@@ -1095,13 +1095,13 @@ _m_pmovmskb (__m64 __A)
/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
in B and produce the high 16 bits of the 32-bit results. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_mulhi_pu16 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pmulhuw (__m64 __A, __m64 __B)
{
return _mm_mulhi_pu16 (__A, __B);
@@ -1110,13 +1110,13 @@ _m_pmulhuw (__m64 __A, __m64 __B)
/* Return a combination of the four 16-bit values in A. The selector
must be an immediate. */
#ifdef __OPTIMIZE__
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_shuffle_pi16 (__m64 __A, int const __N)
{
return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pshufw (__m64 __A, int const __N)
{
return _mm_shuffle_pi16 (__A, __N);
@@ -1130,39 +1130,39 @@ _m_pshufw (__m64 __A, int const __N)
/* Conditionally store byte elements of A into P. The high bit of each
byte in the selector N determines whether the corresponding byte from
A is stored. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
{
__builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
}
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_m_maskmovq (__m64 __A, __m64 __N, char *__P)
{
_mm_maskmove_si64 (__A, __N, __P);
}
/* Compute the rounded averages of the unsigned 8-bit values in A and B. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_avg_pu8 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pavgb (__m64 __A, __m64 __B)
{
return _mm_avg_pu8 (__A, __B);
}
/* Compute the rounded averages of the unsigned 16-bit values in A and B. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_avg_pu16 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_pavgw (__m64 __A, __m64 __B)
{
return _mm_avg_pu16 (__A, __B);
@@ -1171,13 +1171,13 @@ _m_pavgw (__m64 __A, __m64 __B)
/* Compute the sum of the absolute differences of the unsigned 8-bit
values in A and B. Return the value in the lower 16-bit word; the
upper words are cleared. */
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_mm_sad_pu8 (__m64 __A, __m64 __B)
{
return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
}
-static __inline __m64 __attribute__((__always_inline__))
+static __inline __m64 __attribute__((__always_inline__, __artificial__))
_m_psadbw (__m64 __A, __m64 __B)
{
return _mm_sad_pu8 (__A, __B);
@@ -1186,7 +1186,7 @@ _m_psadbw (__m64 __A, __m64 __B)
/* Loads one cache line from address P to a location "closer" to the
processor. The selector I specifies the type of prefetch operation. */
#ifdef __OPTIMIZE__
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_prefetch (void *__P, enum _mm_hint __I)
{
__builtin_prefetch (__P, 0, __I);
@@ -1197,14 +1197,14 @@ _mm_prefetch (void *__P, enum _mm_hint _
#endif
/* Stores the data in A to the address P without polluting the caches. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_stream_pi (__m64 *__P, __m64 __A)
{
__builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);
}
/* Likewise. The address must be 16-byte aligned. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_stream_ps (float *__P, __m128 __A)
{
__builtin_ia32_movntps (__P, (__v4sf)__A);
@@ -1212,7 +1212,7 @@ _mm_stream_ps (float *__P, __m128 __A)
/* Guarantees that every preceding store is globally visible before
any subsequent store. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_sfence (void)
{
__builtin_ia32_sfence ();
@@ -1221,7 +1221,7 @@ _mm_sfence (void)
/* The execution of the next instruction is delayed by an implementation
specific amount of time. The instruction does not modify the
architectural state. */
-static __inline void __attribute__((__always_inline__))
+static __inline void __attribute__((__always_inline__, __artificial__))
_mm_pause (void)
{
__asm__ __volatile__ ("rep; nop" : : );
--- gcc/config/i386/smmintrin.h.jj 2007-06-06 12:54:36.000000000 +0200
+++ gcc/config/i386/smmintrin.h 2007-08-31 20:36:00.000000000 +0200
@@ -67,7 +67,7 @@
constant/variable mask. */
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
{
return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
@@ -79,7 +79,7 @@ _mm_blend_epi16 (__m128i __X, __m128i __
((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(X), (__v8hi)(Y), (M)))
#endif
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
{
return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
@@ -91,7 +91,7 @@ _mm_blendv_epi8 (__m128i __X, __m128i __
from 2 sources using constant/variable mask. */
#ifdef __OPTIMIZE__
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
{
return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
@@ -103,7 +103,7 @@ _mm_blend_ps (__m128 __X, __m128 __Y, co
((__m128) __builtin_ia32_blendps ((__v4sf)(X), (__v4sf)(Y), (M)))
#endif
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
{
return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
@@ -115,7 +115,7 @@ _mm_blendv_ps (__m128 __X, __m128 __Y, _
from 2 sources using constant/variable mask. */
#ifdef __OPTIMIZE__
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
{
return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
@@ -127,7 +127,7 @@ _mm_blend_pd (__m128d __X, __m128d __Y,
((__m128d) __builtin_ia32_blendpd ((__v2df)(X), (__v2df)(Y), (M)))
#endif
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
{
return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
@@ -139,7 +139,7 @@ _mm_blendv_pd (__m128d __X, __m128d __Y,
of result. */
#ifdef __OPTIMIZE__
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
{
return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
@@ -147,7 +147,7 @@ _mm_dp_ps (__m128 __X, __m128 __Y, const
__M);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
{
return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
@@ -164,7 +164,7 @@ _mm_dp_pd (__m128d __X, __m128d __Y, con
/* Packed integer 64-bit comparison, zeroing or filling with ones
corresponding parts of result. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
@@ -172,49 +172,49 @@ _mm_cmpeq_epi64 (__m128i __X, __m128i __
/* Min/max packed integer instructions. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_min_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_max_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_min_epu16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_max_epu16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_min_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_max_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_min_epu32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_max_epu32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
@@ -222,7 +222,7 @@ _mm_max_epu32 (__m128i __X, __m128i __Y)
/* Packed integer 32-bit multiplication with truncation of upper
halves of results. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_mullo_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y);
@@ -230,7 +230,7 @@ _mm_mullo_epi32 (__m128i __X, __m128i __
/* Packed integer 32-bit multiplication of 2 pairs of operands
with two 64-bit results. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_mul_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
@@ -238,7 +238,7 @@ _mm_mul_epi32 (__m128i __X, __m128i __Y)
/* Packed integer 128-bit bitwise comparison. Return 1 if
(__V & __M) == 0. */
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_testz_si128 (__m128i __M, __m128i __V)
{
return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
@@ -246,7 +246,7 @@ _mm_testz_si128 (__m128i __M, __m128i __
/* Packed integer 128-bit bitwise comparison. Return 1 if
(__V & ~__M) == 0. */
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_testc_si128 (__m128i __M, __m128i __V)
{
return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
@@ -254,7 +254,7 @@ _mm_testc_si128 (__m128i __M, __m128i __
/* Packed integer 128-bit bitwise comparison. Return 1 if
(__V & __M) != 0 && (__V & ~__M) != 0. */
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_testnzc_si128 (__m128i __M, __m128i __V)
{
return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
@@ -274,7 +274,7 @@ _mm_testnzc_si128 (__m128i __M, __m128i
zeroing mask for D. */
#ifdef __OPTIMIZE__
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
{
return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
@@ -293,7 +293,7 @@ _mm_insert_ps (__m128 __D, __m128 __S, c
single precision array element of X selected by index N. */
#ifdef __OPTIMIZE__
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_extract_ps (__m128 __X, const int __N)
{
union { int i; float f; } __tmp;
@@ -327,14 +327,14 @@ _mm_extract_ps (__m128 __X, const int __
selected by index N. */
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_insert_epi8 (__m128i __D, int __S, const int __N)
{
return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
__S, __N);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_insert_epi32 (__m128i __D, int __S, const int __N)
{
return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
@@ -342,7 +342,7 @@ _mm_insert_epi32 (__m128i __D, int __S,
}
#ifdef __x86_64__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
{
return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
@@ -366,20 +366,20 @@ _mm_insert_epi64 (__m128i __D, long long
index N. */
#ifdef __OPTIMIZE__
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_extract_epi8 (__m128i __X, const int __N)
{
return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_extract_epi32 (__m128i __X, const int __N)
{
return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
}
#ifdef __x86_64__
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_extract_epi64 (__m128i __X, const int __N)
{
return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
@@ -399,7 +399,7 @@ _mm_extract_epi64 (__m128i __X, const in
/* Return horizontal packed word minimum and its index in bits [15:0]
and bits [18:16] respectively. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_minpos_epu16 (__m128i __X)
{
return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
@@ -408,13 +408,13 @@ _mm_minpos_epu16 (__m128i __X)
/* Packed/scalar double precision floating point rounding. */
#ifdef __OPTIMIZE__
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_round_pd (__m128d __V, const int __M)
{
return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__, __artificial__))
_mm_round_sd(__m128d __D, __m128d __V, const int __M)
{
return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
@@ -432,13 +432,13 @@ _mm_round_sd(__m128d __D, __m128d __V, c
/* Packed/scalar single precision floating point rounding. */
#ifdef __OPTIMIZE__
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_round_ps (__m128 __V, const int __M)
{
return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__, __artificial__))
_mm_round_ss (__m128 __D, __m128 __V, const int __M)
{
return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
@@ -468,37 +468,37 @@ _mm_round_ss (__m128 __D, __m128 __V, co
/* Packed integer sign-extension. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepi8_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepi16_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepi8_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepi32_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepi16_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepi8_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
@@ -506,37 +506,37 @@ _mm_cvtepi8_epi16 (__m128i __X)
/* Packed integer zero-extension. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepu8_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepu16_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepu8_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepu32_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepu16_epi64 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cvtepu8_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
@@ -544,7 +544,7 @@ _mm_cvtepu8_epi16 (__m128i __X)
/* Pack 8 double words from 2 operands into 8 words of result with
unsigned saturation. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_packus_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
@@ -555,7 +555,7 @@ _mm_packus_epi32 (__m128i __X, __m128i _
operands are determined by the 3rd mask operand. */
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
{
return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
@@ -567,7 +567,7 @@ _mm_mpsadbw_epu8 (__m128i __X, __m128i _
#endif
/* Load double quadword using non-temporal aligned hint. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_stream_load_si128 (__m128i *__X)
{
return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
@@ -604,7 +604,7 @@ _mm_stream_load_si128 (__m128i *__X)
/* Intrinsics for text/string processing. */
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
{
return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
@@ -612,7 +612,7 @@ _mm_cmpistrm (__m128i __X, __m128i __Y,
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
@@ -620,7 +620,7 @@ _mm_cmpistri (__m128i __X, __m128i __Y,
__M);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
@@ -628,7 +628,7 @@ _mm_cmpestrm (__m128i __X, int __LX, __m
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
@@ -653,7 +653,7 @@ _mm_cmpestri (__m128i __X, int __LX, __m
EFlags. */
#ifdef __OPTIMIZE__
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
@@ -661,7 +661,7 @@ _mm_cmpistra (__m128i __X, __m128i __Y,
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
@@ -669,7 +669,7 @@ _mm_cmpistrc (__m128i __X, __m128i __Y,
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
@@ -677,7 +677,7 @@ _mm_cmpistro (__m128i __X, __m128i __Y,
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
@@ -685,7 +685,7 @@ _mm_cmpistrs (__m128i __X, __m128i __Y,
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
{
return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
@@ -693,7 +693,7 @@ _mm_cmpistrz (__m128i __X, __m128i __Y,
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
@@ -701,7 +701,7 @@ _mm_cmpestra (__m128i __X, int __LX, __m
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
@@ -709,7 +709,7 @@ _mm_cmpestrc (__m128i __X, int __LX, __m
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
@@ -717,7 +717,7 @@ _mm_cmpestro (__m128i __X, int __LX, __m
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
@@ -725,7 +725,7 @@ _mm_cmpestrs (__m128i __X, int __LX, __m
__M);
}
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
{
return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
@@ -763,21 +763,21 @@ _mm_cmpestrz (__m128i __X, int __LX, __m
/* Packed integer 64-bit comparison, zeroing or filling with ones
corresponding parts of result. */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
}
/* Calculate a number of bits set to 1. */
-static __inline int __attribute__((__always_inline__))
+static __inline int __attribute__((__always_inline__, __artificial__))
_mm_popcnt_u32 (unsigned int __X)
{
return __builtin_popcount (__X);
}
#ifdef __x86_64__
-static __inline long long __attribute__((__always_inline__))
+static __inline long long __attribute__((__always_inline__, __artificial__))
_mm_popcnt_u64 (unsigned long long __X)
{
return __builtin_popcountll (__X);
@@ -785,26 +785,26 @@ _mm_popcnt_u64 (unsigned long long __X)
#endif
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
-static __inline unsigned int __attribute__((__always_inline__))
+static __inline unsigned int __attribute__((__always_inline__, __artificial__))
_mm_crc32_u8 (unsigned int __C, unsigned char __V)
{
return __builtin_ia32_crc32qi (__C, __V);
}
-static __inline unsigned int __attribute__((__always_inline__))
+static __inline unsigned int __attribute__((__always_inline__, __artificial__))
_mm_crc32_u16 (unsigned int __C, unsigned short __V)
{
return __builtin_ia32_crc32hi (__C, __V);
}
-static __inline unsigned int __attribute__((__always_inline__))
+static __inline unsigned int __attribute__((__always_inline__, __artificial__))
_mm_crc32_u32 (unsigned int __C, unsigned int __V)
{
return __builtin_ia32_crc32si (__C, __V);
}
#ifdef __x86_64__
-static __inline unsigned long long __attribute__((__always_inline__))
+static __inline unsigned long long __attribute__((__always_inline__, __artificial__))
_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
{
return __builtin_ia32_crc32di (__C, __V);
Jakub