[PATCH] Enable vectorization of sqrt and lrint on i?86 and x86_64
Richard Guenther
rguenther@suse.de
Tue Nov 21 23:30:00 GMT 2006
This enables the i386 backend to vectorize sqrt and lrint using SSE
intrinsics by implementing the builtin_vectorized_function target hook.
Bootstrapped and tested on x86_64-unknown-linux-gnu.
This patch depends on
[PATCH] (2/3) Add vectorization of builtin functions
http://gcc.gnu.org/ml/gcc-patches/2006-11/msg01144.html
Ok for mainline?
Thanks,
Richard.
2006-11-18 Richard Guenther <rguenther@suse.de>
* config/i386/i386.c (ix86_builtin_vectorized_function): Declare.
(TARGET_BUILTIN_VECTORIZED_FUNCTION): Define.
(ix86_builtin_vectorized_function): New function to vectorize
lrint and sqrt.
* gcc.target/i386/vectorize2.c: New testcase.
* gcc.target/i386/vectorize2.c: Likewise.
Index: gcc/config/i386/i386.c
===================================================================
--- gcc.orig/config/i386/i386.c 2006-11-21 22:17:19.000000000 +0100
+++ gcc/config/i386/i386.c 2006-11-21 22:21:46.000000000 +0100
@@ -1275,6 +1275,7 @@
tree, bool);
static void ix86_init_builtins (void);
static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static tree ix86_builtin_vectorized_function (enum built_in_function, tree);
static const char *ix86_mangle_fundamental_type (tree);
static tree ix86_stack_protect_fail (void);
static rtx ix86_internal_arg_pointer (void);
@@ -1339,6 +1340,8 @@
#define TARGET_INIT_BUILTINS ix86_init_builtins
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
+#undef TARGET_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
#undef TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
@@ -16866,6 +16869,53 @@
gcc_unreachable ();
}
+/* Returns a function decl for a vectorized version of the builtin function
+ with builtin function code FN and the result vector type TYPE, or NULL_TREE
+ if it is not available. */
+
+static tree
+ix86_builtin_vectorized_function (enum built_in_function fn, tree type)
+{
+ enum machine_mode el_mode;
+ int n;
+
+ if (TREE_CODE (type) != VECTOR_TYPE)
+ return NULL_TREE;
+
+ el_mode = TYPE_MODE (TREE_TYPE (type));
+ n = TYPE_VECTOR_SUBPARTS (type);
+
+ switch (fn)
+ {
+ case BUILT_IN_LRINT:
+ case BUILT_IN_LLRINT:
+ if (el_mode == SImode && n == 2)
+ return ix86_builtins[IX86_BUILTIN_CVTPD2DQ];
+ return NULL_TREE;
+
+ case BUILT_IN_LRINTF:
+ case BUILT_IN_LLRINTF:
+ if (el_mode == SImode && n == 4)
+ return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
+ return NULL_TREE;
+
+ case BUILT_IN_SQRT:
+ if (el_mode == DFmode && n == 2)
+ return ix86_builtins[IX86_BUILTIN_SQRTPD];
+ return NULL_TREE;
+
+ case BUILT_IN_SQRTF:
+ if (el_mode == SFmode && n == 4)
+ return ix86_builtins[IX86_BUILTIN_SQRTPS];
+ return NULL_TREE;
+
+ default:
+ ;
+ }
+
+ return NULL_TREE;
+}
+
/* Store OPERAND to the memory after reload is completed. This means
that we can't easily use assign_stack_local. */
rtx
Index: gcc/testsuite/gcc.target/i386/vectorize2.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ gcc/testsuite/gcc.target/i386/vectorize2.c 2006-11-21 22:21:46.000000000 +0100
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 -fdump-tree-vect-details" } */
+
+double x[256];
+float y[256];
+
+void test1 (void)
+{
+ int j;
+ for (j=0; j<256; ++j)
+ x[j] = __builtin_sqrt (x[j]);
+}
+
+void test2 (void)
+{
+ int j;
+ for (j=0; j<256; ++j)
+ y[j] = __builtin_sqrtf (y[j]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.target/i386/vectorize3.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ gcc/testsuite/gcc.target/i386/vectorize3.c 2006-11-21 22:21:46.000000000 +0100
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 -fdump-tree-vect-details" } */
+
+double x[256];
+float y[256];
+int a[256];
+
+void test1 (void)
+{
+ int i;
+ for (i=0; i<256; ++i)
+ a[i] = __builtin_lrintf (y[i]);
+}
+
+void test2 (void)
+{
+ int i;
+ for (i=0; i<256; ++i)
+ a[i] = __builtin_lrint (x[i]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } { xfail "x86_64-*-*" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
More information about the Gcc-patches
mailing list