[PATCH] Use *blendv* for sse_movcc/vcond/vcondu if possible
Jakub Jelinek
jakub@redhat.com
Wed Sep 21 15:12:00 GMT 2011
On Wed, Sep 21, 2011 at 07:25:43AM -0700, Richard Henderson wrote:
> On 09/21/2011 04:37 AM, Jakub Jelinek wrote:
> > op_false = force_reg (mode, op_false);
> > + switch (mode)
> > + {
> > + case V4SFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvps; break;
> > + case V2DFmode: if (TARGET_SSE4_1) gen = gen_sse4_1_blendvpd; break;
> > + case V16QImode: if (TARGET_SSE4_1) gen = gen_sse4_1_pblendvb; break;
> > + case V8SFmode: if (TARGET_AVX) gen = gen_avx_blendvps256; break;
> > + case V4DFmode: if (TARGET_AVX) gen = gen_avx_blendvpd256; break;
> > + case V32QImode: if (TARGET_AVX2) gen = gen_avx2_pblendvb; break;
> > + default: break;
> > + }
>
> pblendvb is applicable to all of the integer modes, not just QImode.
> You do have to frob the modes around, but it'll work.
Good idea. So like this instead?
2011-09-21 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.c (ix86_expand_sse_movcc): Use
blendvps, blendvpd and pblendvb if possible.
* gcc.dg/vect/vect-cond-7.c: New test.
* gcc.target/i386/sse4_1-cond-1.c: New test.
* gcc.target/i386/avx-cond-1.c: New test.
--- gcc/config/i386/i386.c.jj 2011-09-21 16:32:10.878449912 +0200
+++ gcc/config/i386/i386.c 2011-09-21 16:37:46.531420718 +0200
@@ -18905,24 +18905,80 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp
}
else
{
- op_true = force_reg (mode, op_true);
+ rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
+
op_false = force_reg (mode, op_false);
- t2 = gen_reg_rtx (mode);
- if (optimize)
- t3 = gen_reg_rtx (mode);
+ switch (mode)
+ {
+ case V4SFmode:
+ if (TARGET_SSE4_1)
+ gen = gen_sse4_1_blendvps;
+ break;
+ case V2DFmode:
+ if (TARGET_SSE4_1)
+ gen = gen_sse4_1_blendvpd;
+ break;
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ if (TARGET_SSE4_1)
+ {
+ gen = gen_sse4_1_pblendvb;
+ dest = gen_lowpart (V16QImode, dest);
+ op_false = gen_lowpart (V16QImode, op_false);
+ op_true = gen_lowpart (V16QImode, op_true);
+ cmp = gen_lowpart (V16QImode, cmp);
+ }
+ break;
+ case V8SFmode:
+ if (TARGET_AVX)
+ gen = gen_avx_blendvps256;
+ break;
+ case V4DFmode:
+ if (TARGET_AVX)
+ gen = gen_avx_blendvpd256;
+ break;
+ case V32QImode:
+ case V16HImode:
+ case V8SImode:
+ case V4DImode:
+ if (TARGET_AVX2)
+ {
+ gen = gen_avx2_pblendvb;
+ dest = gen_lowpart (V32QImode, dest);
+ op_false = gen_lowpart (V32QImode, op_false);
+ op_true = gen_lowpart (V32QImode, op_true);
+ cmp = gen_lowpart (V32QImode, cmp);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (gen != NULL)
+ emit_insn (gen (dest, op_false, op_true, cmp));
else
- t3 = dest;
+ {
+ op_true = force_reg (mode, op_true);
- x = gen_rtx_AND (mode, op_true, cmp);
- emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+ t2 = gen_reg_rtx (mode);
+ if (optimize)
+ t3 = gen_reg_rtx (mode);
+ else
+ t3 = dest;
+
+ x = gen_rtx_AND (mode, op_true, cmp);
+ emit_insn (gen_rtx_SET (VOIDmode, t2, x));
- x = gen_rtx_NOT (mode, cmp);
- x = gen_rtx_AND (mode, x, op_false);
- emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+ x = gen_rtx_NOT (mode, cmp);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, t3, x));
- x = gen_rtx_IOR (mode, t3, t2);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ x = gen_rtx_IOR (mode, t3, t2);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
}
}
--- gcc/testsuite/gcc.dg/vect/vect-cond-7.c.jj 2011-09-21 16:32:37.227546030 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-cond-7.c 2011-09-21 16:32:37.227546030 +0200
@@ -0,0 +1,68 @@
+#include "tree-vect.h"
+
+extern void abort (void);
+double ad[64], bd[64], cd[64], dd[64], ed[64];
+float af[64], bf[64], cf[64], df[64], ef[64];
+signed char ac[64], bc[64], cc[64], dc[64], ec[64];
+short as[64], bs[64], cs[64], ds[64], es[64];
+int ai[64], bi[64], ci[64], di[64], ei[64];
+long long all[64], bll[64], cll[64], dll[64], ell[64];
+unsigned char auc[64], buc[64], cuc[64], duc[64], euc[64];
+unsigned short aus[64], bus[64], cus[64], dus[64], eus[64];
+unsigned int au[64], bu[64], cu[64], du[64], eu[64];
+unsigned long long aull[64], bull[64], cull[64], dull[64], eull[64];
+
+#define F(var) \
+__attribute__((noinline, noclone)) void \
+f##var (void) \
+{ \
+ int i; \
+ for (i = 0; i < 64; i++) \
+ { \
+ __typeof (a##var[0]) d = d##var[i], e = e##var[i]; \
+ a##var[i] = b##var[i] > c##var[i] ? d : e; \
+ } \
+}
+
+#define TESTS \
+F (d) F (f) F (c) F (s) F (i) F (ll) F (uc) F (us) F (u) F (ull)
+
+TESTS
+
+int
+main ()
+{
+ int i;
+
+ check_vect ();
+ for (i = 0; i < 64; i++)
+ {
+#undef F
+#define F(var) \
+ b##var[i] = i + 64; \
+ switch (i % 3) \
+ { \
+ case 0: c##var[i] = i + 64; break; \
+ case 1: c##var[i] = 127 - i; break; \
+ case 2: c##var[i] = i; break; \
+ } \
+ d##var[i] = i / 2; \
+ e##var[i] = i * 2;
+ TESTS
+ }
+#undef F
+#define F(var) f##var ();
+ TESTS
+ for (i = 0; i < 64; i++)
+ {
+ asm volatile ("" : : : "memory");
+#undef F
+#define F(var) \
+ if (a##var[i] != (b##var[i] > c##var[i] ? d##var[i] : e##var[i])) \
+ abort ();
+ TESTS
+ }
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.target/i386/sse4_1-cond-1.c.jj 2011-09-21 16:32:37.228590863 +0200
+++ gcc/testsuite/gcc.target/i386/sse4_1-cond-1.c 2011-09-21 16:32:37.228590863 +0200
@@ -0,0 +1,75 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O3 -msse4.1" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+extern void abort (void);
+double ad[64], bd[64], cd[64], dd[64], ed[64];
+float af[64], bf[64], cf[64], df[64], ef[64];
+signed char ac[64], bc[64], cc[64], dc[64], ec[64];
+short as[64], bs[64], cs[64], ds[64], es[64];
+int ai[64], bi[64], ci[64], di[64], ei[64];
+long long all[64], bll[64], cll[64], dll[64], ell[64];
+unsigned char auc[64], buc[64], cuc[64], duc[64], euc[64];
+unsigned short aus[64], bus[64], cus[64], dus[64], eus[64];
+unsigned int au[64], bu[64], cu[64], du[64], eu[64];
+unsigned long long aull[64], bull[64], cull[64], dull[64], eull[64];
+
+#define F(var) \
+__attribute__((noinline, noclone)) void \
+f##var (void) \
+{ \
+ int i; \
+ for (i = 0; i < 64; i++) \
+ { \
+ __typeof (a##var[0]) d = d##var[i], e = e##var[i]; \
+ a##var[i] = b##var[i] > c##var[i] ? d : e; \
+ } \
+}
+
+#define TESTS \
+F (d) F (f) F (c) F (s) F (i) F (ll) F (uc) F (us) F (u) F (ull)
+
+TESTS
+
+void
+TEST ()
+{
+ int i;
+ for (i = 0; i < 64; i++)
+ {
+#undef F
+#define F(var) \
+ b##var[i] = i + 64; \
+ switch (i % 3) \
+ { \
+ case 0: c##var[i] = i + 64; break; \
+ case 1: c##var[i] = 127 - i; break; \
+ case 2: c##var[i] = i; break; \
+ } \
+ d##var[i] = i / 2; \
+ e##var[i] = i * 2;
+ TESTS
+ }
+#undef F
+#define F(var) f##var ();
+ TESTS
+ for (i = 0; i < 64; i++)
+ {
+ asm volatile ("" : : : "memory");
+#undef F
+#define F(var) \
+ if (a##var[i] != (b##var[i] > c##var[i] ? d##var[i] : e##var[i])) \
+ abort ();
+ TESTS
+ }
+}
--- gcc/testsuite/gcc.target/i386/avx-cond-1.c.jj 2011-09-21 16:32:37.228590863 +0200
+++ gcc/testsuite/gcc.target/i386/avx-cond-1.c 2011-09-21 16:32:37.229545353 +0200
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx" } */
+/* { dg-require-effective-target avx_runtime } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include "sse4_1-cond-1.c"
Jakub
More information about the Gcc-patches
mailing list