[PATCH] Add constant folding for _mm*movemask* intrinsics (PR target/85317)
Jakub Jelinek
jakub@redhat.com
Mon May 7 08:20:00 GMT 2018
Hi!
The following patch handles constant folding of the builtins used in
*movemask* intrinsics - they have single operand and the only useful folding
seems to be if the argument is VECTOR_CST, we can do what the instruction
would do on that input and return the resulting INTEGER_CST.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2018-05-06 Jakub Jelinek <jakub@redhat.com>
PR target/85317
* config/i386/i386.c (ix86_fold_builtin): Handle
IX86_BUILTIN_{,P}MOVMSK{PS,PD,B}{,128,256}.
* gcc.target/i386/pr85317.c: New test.
* gcc.target/i386/avx2-vpmovmskb-2.c (avx2_test): Add asm volatile
optimization barrier to avoid optimizing away the expected insn.
--- gcc/config/i386/i386.c.jj 2018-05-01 12:18:01.256814924 +0200
+++ gcc/config/i386/i386.c 2018-05-06 13:56:18.647354123 +0200
@@ -33477,6 +33477,37 @@ ix86_fold_builtin (tree fndecl, int n_ar
}
break;
+ case IX86_BUILTIN_MOVMSKPS:
+ case IX86_BUILTIN_PMOVMSKB:
+ case IX86_BUILTIN_MOVMSKPD:
+ case IX86_BUILTIN_PMOVMSKB128:
+ case IX86_BUILTIN_MOVMSKPD256:
+ case IX86_BUILTIN_MOVMSKPS256:
+ case IX86_BUILTIN_PMOVMSKB256:
+ gcc_assert (n_args == 1);
+ if (TREE_CODE (args[0]) == VECTOR_CST)
+ {
+ HOST_WIDE_INT res = 0;
+ for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
+ {
+ tree e = VECTOR_CST_ELT (args[0], i);
+ if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
+ {
+ if (wi::neg_p (wi::to_wide (e)))
+ res |= HOST_WIDE_INT_1 << i;
+ }
+ else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
+ {
+ if (TREE_REAL_CST (e).sign)
+ res |= HOST_WIDE_INT_1 << i;
+ }
+ else
+ return NULL_TREE;
+ }
+ return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
+ }
+ break;
+
default:
break;
}
--- gcc/testsuite/gcc.target/i386/pr85317.c.jj 2018-05-06 14:28:59.201755826 +0200
+++ gcc/testsuite/gcc.target/i386/pr85317.c 2018-05-06 14:28:47.165747887 +0200
@@ -0,0 +1,38 @@
+/* PR target/85317 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */
+
+#include <x86intrin.h>
+
+extern void link_error (void);
+
+int
+main ()
+{
+ int a = _mm_movemask_pd (_mm_set_pd (-2.0, 2.0));
+ if (a != 2) link_error ();
+ int b = _mm_movemask_pd (_mm_set_pd (0.0, __builtin_copysign (0.0, -4.0)));
+ if (b != 1) link_error ();
+ int c = _mm_movemask_ps (_mm_set_ps (__builtin_copysignf (0.0f, -4.0f), 0.0f,
+ -4.0f, 4.0f));
+ if (c != 10) link_error ();
+ int d = _mm_movemask_epi8 (_mm_set_epi8 (-4, 8, -8, -12, 12, 15, 0, -1, -3,
+ -128, 127, 126, 120, -120, 0, 5));
+ if (d != 0xb1c4) link_error ();
+ int e = _mm256_movemask_pd (_mm256_set_pd (-4.0, 0.0, 4.0,
+ __builtin_copysign (0.0, -4.0)));
+ if (e != 9) link_error ();
+ int f = _mm256_movemask_ps (_mm256_set_ps (-8.0f, -16.0f, 12.0f, 127.0f,
+ -4.0f, 0.0f, 4.0f,
+ __builtin_copysign (0.0f,
+ -4.0f)));
+ if (f != 0xc9) link_error ();
+ int g = _mm256_movemask_epi8 (_mm256_set_epi8 (-4, 8, -8, -12, 12, 15, 0, -1,
+ -3, -128, 127, 126, 120, -120,
+ 0, 5, 12, 100, -20, -50, -70,
+ 2, -65, 0, -1, 1, 2, -2, -9,
+ -9, 19, -64));
+ if (g != (int) 0xb1c43a9dU) link_error ();
+ return 0;
+}
--- gcc/testsuite/gcc.target/i386/avx2-vpmovmskb-2.c.jj 2016-05-22 12:20:18.514840455 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-vpmovmskb-2.c 2018-05-07 09:16:22.766301274 +0200
@@ -14,6 +14,7 @@ avx2_test (void)
s.x = _mm256_set_epi8 (1, 2, 3, 4, 10, 20, 30, 90, -80, -40, -100,
15, 98, 25, 98, 7, 1, 2, 3, 4, 10, 20, 30, 90,
-80, -40, -100, -15, 98, 25, 98, 7);
+ __asm volatile ("" : "+m" (s) : : "memory");
res = _mm256_movemask_epi8 (s.x);
Jakub
More information about the Gcc-patches
mailing list