]> gcc.gnu.org Git - gcc.git/commitdiff
Fold blendv builtins into gimple.
authorliuhongt <hongtao.liu@intel.com>
Fri, 21 May 2021 01:48:18 +0000 (09:48 +0800)
committerliuhongt <hongtao.liu@intel.com>
Mon, 28 Jun 2021 01:15:40 +0000 (09:15 +0800)
Fold __builtin_ia32_pblendvb128 (a, b, c) as VEC_COND_EXPR (c < 0, b,
a), similar for float version but with mask operand VIEW_CONVERT_EXPR
to same sized integer vectype.

gcc/ChangeLog:

* config/i386/i386-builtin.def (IX86_BUILTIN_BLENDVPD256,
IX86_BUILTIN_BLENDVPS256, IX86_BUILTIN_PBLENDVB256,
IX86_BUILTIN_BLENDVPD, IX86_BUILTIN_BLENDVPS,
IX86_BUILTIN_PBLENDVB128): Replace icode with
CODE_FOR_nothing.
* config/i386/i386.c (ix86_gimple_fold_builtin): Fold blendv
builtins.
* config/i386/sse.md (*<sse4_1_avx2>_pblendvb_lt_subreg_not):
New pre_reload splitter.

gcc/testsuite/ChangeLog:

* gcc.target/i386/funcspec-8.c: Replace
__builtin_ia32_blendvpd with  __builtin_ia32_roundps_az.
* gcc.target/i386/blendv-1.c: New test.
* gcc.target/i386/blendv-2.c: New test.

gcc/config/i386/i386-builtin.def
gcc/config/i386/i386.c
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/blendv-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/blendv-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/funcspec-8.c

index ea79e0bdda270db93c426bb0ae64746d30a1e520..1cc0cc6968ce7194b2a6e90928369d6ef72c2b98 100644 (file)
@@ -902,13 +902,13 @@ BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_palignrdi,
 /* SSE4.1 */
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
 
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI)
@@ -1028,8 +1028,8 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpe
 
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT)
@@ -1154,7 +1154,7 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256",  IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256",  IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
index c71c9e666a4ed97f7a9746508b33343f7b7e4d6b..a93128fa0a4e3d60d3b3d33b0785cf6fbb3b464a 100644 (file)
@@ -17983,6 +17983,43 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
        }
       break;
 
+    case IX86_BUILTIN_PBLENDVB128:
+    case IX86_BUILTIN_PBLENDVB256:
+    case IX86_BUILTIN_BLENDVPS:
+    case IX86_BUILTIN_BLENDVPD:
+    case IX86_BUILTIN_BLENDVPS256:
+    case IX86_BUILTIN_BLENDVPD256:
+      gcc_assert (n_args == 3);
+      arg0 = gimple_call_arg (stmt, 0);
+      arg1 = gimple_call_arg (stmt, 1);
+      arg2 = gimple_call_arg (stmt, 2);
+      if (gimple_call_lhs (stmt))
+       {
+         location_t loc = gimple_location (stmt);
+         tree type = TREE_TYPE (arg2);
+         gimple_seq stmts = NULL;
+         if (VECTOR_FLOAT_TYPE_P (type))
+           {
+             tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
+               ? intSI_type_node : intDI_type_node;
+             type = get_same_sized_vectype (itype, type);
+             arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
+           }
+         tree zero_vec = build_zero_cst (type);
+         tree cmp_type = truth_type_for (type);
+         tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
+         gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+         gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
+                                          VEC_COND_EXPR, cmp,
+                                          arg1, arg0);
+         gimple_set_location (g, loc);
+         gsi_replace (gsi, g, false);
+       }
+      else
+       gsi_replace (gsi, gimple_build_nop (), false);
+      return true;
+
+
     case IX86_BUILTIN_PCMPEQB128:
     case IX86_BUILTIN_PCMPEQW128:
     case IX86_BUILTIN_PCMPEQD128:
index e4f01e64bc161747258749dd896e3e652a391f09..310063544a6184a4933c5f5118361108eda7c6b6 100644 (file)
    (set_attr "btver2_decode" "vector,vector,vector")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt_subreg_not"
+  [(set (match_operand:VI1_AVX2 0 "register_operand")
+       (unspec:VI1_AVX2
+         [(match_operand:VI1_AVX2 2 "vector_operand")
+          (match_operand:VI1_AVX2 1 "register_operand")
+          (lt:VI1_AVX2
+            (subreg:VI1_AVX2
+              (not (match_operand 3 "register_operand")) 0)
+            (match_operand:VI1_AVX2 4 "const0_operand"))]
+         UNSPEC_BLENDV))]
+  "TARGET_SSE4_1
+   && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
+   && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (unspec:VI1_AVX2
+        [(match_dup 1) (match_dup 2)
+         (lt:VI1_AVX2 (match_dup 3) (match_dup 4))] UNSPEC_BLENDV))]
+  "operands[3] = gen_lowpart (<MODE>mode, operands[3]);")
+
 (define_insn "sse4_1_pblendw"
   [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
        (vec_merge:V8HI
diff --git a/gcc/testsuite/gcc.target/i386/blendv-1.c b/gcc/testsuite/gcc.target/i386/blendv-1.c
new file mode 100644 (file)
index 0000000..fcbbfb9
--- /dev/null
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2 -mno-avx512f" } */
+/* { dg-final { scan-assembler-times {pblendvb[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {pblendvb[\t ]*%ymm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvps[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvps[\t ]*%ymm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvpd[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvpd[\t ]*%ymm} 1 } } */
+
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef float v8sf __attribute__ ((vector_size (32)));
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef double v4df __attribute__ ((vector_size (32)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v32qi __attribute__ ((vector_size (32)));
+
+v4sf
+foo (v4sf a, v4sf b, v4sf c)
+{
+  return __builtin_ia32_blendvps (a, b, c);
+}
+
+v8sf
+foo2 (v8sf a, v8sf b, v8sf c)
+{
+  return __builtin_ia32_blendvps256 (a, b, c);
+}
+
+v2df
+foo3 (v2df a, v2df b, v2df c)
+{
+  return __builtin_ia32_blendvpd (a, b, c);
+}
+
+v4df
+foo4 (v4df a, v4df b, v4df c)
+{
+  return __builtin_ia32_blendvpd256 (a, b, c);
+}
+
+v16qi
+foo5 (v16qi a, v16qi b, v16qi c)
+{
+  return __builtin_ia32_pblendvb128 (a, b, c);
+}
+
+v32qi
+foo6 (v32qi a, v32qi b, v32qi c)
+{
+  return __builtin_ia32_pblendvb256 (a, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/blendv-2.c b/gcc/testsuite/gcc.target/i386/blendv-2.c
new file mode 100644 (file)
index 0000000..e61e023
--- /dev/null
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2 -mno-avx512f" } */
+/* { dg-final { scan-assembler-not {pblendv} } } */
+/* { dg-final { scan-assembler-not {blendvp} } } */
+
+#include <x86intrin.h>
+__m128
+foo (__m128 a, __m128 b)
+{
+  return _mm_blendv_ps (a, b, _mm_setzero_ps ());
+}
+
+__m256
+foo2 (__m256 a, __m256 b)
+{
+  return _mm256_blendv_ps (a, b, _mm256_set1_ps (-1.0));
+}
+
+__m128d
+foo3 (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_blendv_pd (a, b, _mm_set1_pd (1.0));
+}
+
+__m256d
+foo4 (__m256d a, __m256d b, __m256d c)
+{
+  return _mm256_blendv_pd (a, b, _mm256_set1_pd (-134.3));
+}
+
+__m128i
+foo5 (__m128i a, __m128i b, __m128i c)
+{
+  return _mm_blendv_epi8 (a, b, _mm_set1_epi8 (3));
+}
+
+__m256i
+foo6 (__m256i a, __m256i b, __m256i c)
+{
+  return _mm256_blendv_epi8 (a, b, _mm256_set1_epi8 (-22));
+}
index 0a6c709003a02e31f0285fde32b06b579fad1956..f15541169e719705189b806df62005f572099698 100644 (file)
@@ -52,19 +52,19 @@ generic_psignd128 (__m128w a, __m128w b)
 #error "-msse4.1 should not be set for this test"
 #endif
 
-__m128d sse4_1_blendvpd (__m128d a, __m128d b, __m128d c) __attribute__((__target__("sse4.1")));
-__m128d generic_blendvpd (__m128d a, __m128d b, __m128d c);
+__m128 sse4_1_roundv4sf2 (__m128 a) __attribute__((__target__("sse4.1")));
+__m128 generic_roundv4sf2 (__m128 a);
 
-__m128d
-sse4_1_blendvpd  (__m128d a, __m128d b, __m128d c)
+__m128
+sse4_1_roundv4sf2  (__m128 a)
 {
-  return __builtin_ia32_blendvpd (a, b, c);
+  return __builtin_ia32_roundps_az (a);
 }
 
-__m128d
-generic_blendvpd  (__m128d a, __m128d b, __m128d c)
+__m128
+generic_blendvpd  (__m128 a)
 {
-  return __builtin_ia32_blendvpd (a, b, c);            /* { dg-error "needs isa option" } */
+  return __builtin_ia32_roundps_az (a);                /* { dg-error "needs isa option" } */
 }
 
 #ifdef __SSE4_2__
This page took 0.094602 seconds and 5 git commands to generate.