[PATCH 24/62] AVX512FP16: Add vmovw/vmovsh.
liuhongt
hongtao.liu@intel.com
Thu Jul 1 06:16:10 GMT 2021
gcc/ChangeLog:
* config/i386/avx512fp16intrin.h: (_mm_cvtsi16_si128):
New intrinsic.
(_mm_cvtsi128_si16): Likewise.
(_mm_mask_load_sh): Likewise.
(_mm_maskz_load_sh): Likewise.
(_mm_mask_store_sh): Likewise.
(_mm_move_sh): Likewise.
(_mm_mask_move_sh): Likewise.
(_mm_maskz_move_sh): Likewise.
* config/i386/i386-builtin-types.def: Add corresponding builtin types.
* config/i386/i386-builtin.def: Add corresponding new builtins.
* config/i386/i386-expand.c
(ix86_expand_special_args_builtin): Handle new builtin types.
(ix86_expand_vector_init_one_nonzero): Adjust for FP16 target.
* config/i386/sse.md (VI2F): New mode iterator.
(vec_set<mode>_0): Use new mode iterator.
(avx512f_mov<ssescalarmodelower>_mask): Adjust for HF vector mode.
(avx512f_store<mode>_mask): Ditto.
---
gcc/config/i386/avx512fp16intrin.h | 59 ++++++++++++++++++++++++++
gcc/config/i386/i386-builtin-types.def | 3 ++
gcc/config/i386/i386-builtin.def | 5 +++
gcc/config/i386/i386-expand.c | 11 +++++
gcc/config/i386/sse.md | 33 +++++++-------
5 files changed, 95 insertions(+), 16 deletions(-)
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 2fbfc140c44..cdf6646c8c6 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -2453,6 +2453,65 @@ _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
#endif /* __OPTIMIZE__ */
+/* Intrinsics vmovw. */
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi16_si128 (short __A)
+{
+ return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si16 (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
+}
+
+/* Intrinsics vmovsh. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
+{
+ return __builtin_ia32_loadsh_mask (__C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
+{
+ return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
+{
+ __builtin_ia32_storesh_mask (__A, __C, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_sh (__m128h __A, __m128h __B)
+{
+ __A[0] = __B[0];
+ return __A;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+ return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
+}
+
#ifdef __DISABLE_AVX512FP16__
#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 79e7edf13e5..6cf3e354c78 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -134,6 +134,7 @@ DEF_POINTER_TYPE (PCVOID, VOID, CONST)
DEF_POINTER_TYPE (PVOID, VOID)
DEF_POINTER_TYPE (PDOUBLE, DOUBLE)
DEF_POINTER_TYPE (PFLOAT, FLOAT)
+DEF_POINTER_TYPE (PCFLOAT16, FLOAT16, CONST)
DEF_POINTER_TYPE (PSHORT, SHORT)
DEF_POINTER_TYPE (PUSHORT, USHORT)
DEF_POINTER_TYPE (PINT, INT)
@@ -1308,6 +1309,8 @@ DEF_FUNCTION_TYPE (QI, V8HF, INT, UQI)
DEF_FUNCTION_TYPE (HI, V16HF, INT, UHI)
DEF_FUNCTION_TYPE (SI, V32HF, INT, USI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF)
+DEF_FUNCTION_TYPE (VOID, PCFLOAT16, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8HF, PCFLOAT16, V8HF, UQI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, UQI)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT)
DEF_FUNCTION_TYPE (V8HF, V8HF, INT, V8HF, UQI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index ed1a4a38b1c..be617b8f18a 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -393,6 +393,10 @@ BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mas
BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+/* AVX512FP16 */
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_loadhf_mask, "__builtin_ia32_loadsh_mask", IX86_BUILTIN_LOADSH_MASK, UNKNOWN, (int) V8HF_FTYPE_PCFLOAT16_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_storehf_mask, "__builtin_ia32_storesh_mask", IX86_BUILTIN_STORESH_MASK, UNKNOWN, (int) VOID_FTYPE_PCFLOAT16_V8HF_UQI)
+
/* RDPKRU and WRPKRU. */
BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID)
BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
@@ -2826,6 +2830,7 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getexpv8hf_mask, "__builtin_ia32_getexpph128_mask", IX86_BUILTIN_GETEXPPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getmantv16hf_mask, "__builtin_ia32_getmantph256_mask", IX86_BUILTIN_GETMANTPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getmantv8hf_mask, "__builtin_ia32_getmantph128_mask", IX86_BUILTIN_GETMANTPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_movhf_mask, "__builtin_ia32_vmovsh_mask", IX86_BUILTIN_VMOVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 266aa411ddb..bfc7fc75b97 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -10907,6 +10907,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PFLOAT_V16SF_UHI:
case VOID_FTYPE_PFLOAT_V8SF_UQI:
case VOID_FTYPE_PFLOAT_V4SF_UQI:
+ case VOID_FTYPE_PCFLOAT16_V8HF_UQI:
case VOID_FTYPE_PV32QI_V32HI_USI:
case VOID_FTYPE_PV16QI_V16HI_UHI:
case VOID_FTYPE_PUDI_V8HI_UQI:
@@ -10979,6 +10980,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
+ case V8HF_FTYPE_PCFLOAT16_V8HF_UQI:
nargs = 3;
klass = load;
memory = 0;
@@ -13993,6 +13995,8 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
break;
case E_V8HImode:
use_vector_set = TARGET_SSE2;
+ gen_vec_set_0 = TARGET_AVX512FP16 && one_var == 0
+ ? gen_vec_setv8hi_0 : NULL;
break;
case E_V8QImode:
use_vector_set = TARGET_MMX_WITH_SSE && TARGET_SSE4_1;
@@ -14004,8 +14008,12 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
use_vector_set = TARGET_SSE4_1;
break;
case E_V32QImode:
+ use_vector_set = TARGET_AVX;
+ break;
case E_V16HImode:
use_vector_set = TARGET_AVX;
+ gen_vec_set_0 = TARGET_AVX512FP16 && one_var == 0
+ ? gen_vec_setv16hi_0 : NULL;
break;
case E_V8SImode:
use_vector_set = TARGET_AVX;
@@ -14053,6 +14061,9 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
use_vector_set = TARGET_AVX512FP16 && one_var == 0;
gen_vec_set_0 = gen_vec_setv32hf_0;
break;
+ case E_V32HImode:
+ use_vector_set = TARGET_AVX512FP16 && one_var == 0;
+ gen_vec_set_0 = gen_vec_setv32hi_0;
default:
break;
}
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c4db778e25d..97f7c698d5d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -758,6 +758,7 @@ (define_mode_iterator VIHF_AVX512BW
(V32HF "TARGET_AVX512FP16")])
;; Int-float size matches
+(define_mode_iterator VI2F [V8HI V16HI V32HI V8HF V16HF V32HF])
(define_mode_iterator VI4F_128 [V4SI V4SF])
(define_mode_iterator VI8F_128 [V2DI V2DF])
(define_mode_iterator VI4F_256 [V8SI V8SF])
@@ -1317,13 +1318,13 @@ (define_insn_and_split "*<avx512>_load<mode>"
[(set (match_dup 0) (match_dup 1))])
(define_insn "avx512f_mov<ssescalarmodelower>_mask"
- [(set (match_operand:VF_128 0 "register_operand" "=v")
- (vec_merge:VF_128
- (vec_merge:VF_128
- (match_operand:VF_128 2 "register_operand" "v")
- (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
+ [(set (match_operand:VFH_128 0 "register_operand" "=v")
+ (vec_merge:VFH_128
+ (vec_merge:VFH_128
+ (match_operand:VFH_128 2 "register_operand" "v")
+ (match_operand:VFH_128 3 "nonimm_or_0_operand" "0C")
(match_operand:QI 4 "register_operand" "Yk"))
- (match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VFH_128 1 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512F"
"vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
@@ -1336,7 +1337,7 @@ (define_expand "avx512f_load<mode>_mask"
(vec_merge:<ssevecmode>
(vec_merge:<ssevecmode>
(vec_duplicate:<ssevecmode>
- (match_operand:MODEF 1 "memory_operand"))
+ (match_operand:MODEFH 1 "memory_operand"))
(match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
(match_operand:QI 3 "register_operand"))
(match_dup 4)
@@ -1349,7 +1350,7 @@ (define_insn "*avx512f_load<mode>_mask"
(vec_merge:<ssevecmode>
(vec_merge:<ssevecmode>
(vec_duplicate:<ssevecmode>
- (match_operand:MODEF 1 "memory_operand" "m"))
+ (match_operand:MODEFH 1 "memory_operand" "m"))
(match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
(match_operand:QI 3 "register_operand" "Yk"))
(match_operand:<ssevecmode> 4 "const0_operand" "C")
@@ -1362,11 +1363,11 @@ (define_insn "*avx512f_load<mode>_mask"
(set_attr "mode" "<MODE>")])
(define_insn "avx512f_store<mode>_mask"
- [(set (match_operand:MODEF 0 "memory_operand" "=m")
- (if_then_else:MODEF
+ [(set (match_operand:MODEFH 0 "memory_operand" "=m")
+ (if_then_else:MODEFH
(and:QI (match_operand:QI 2 "register_operand" "Yk")
(const_int 1))
- (vec_select:MODEF
+ (vec_select:MODEFH
(match_operand:<ssevecmode> 1 "register_operand" "v")
(parallel [(const_int 0)]))
(match_dup 0)))]
@@ -8513,11 +8514,11 @@ (define_insn "vec_set<mode>_0"
;; vmovw clears also the higer bits
(define_insn "vec_set<mode>_0"
- [(set (match_operand:VF_AVX512FP16 0 "register_operand" "=v")
- (vec_merge:VF_AVX512FP16
- (vec_duplicate:VF_AVX512FP16
- (match_operand:HF 2 "nonimmediate_operand" "rm"))
- (match_operand:VF_AVX512FP16 1 "const0_operand" "C")
+ [(set (match_operand:VI2F 0 "register_operand" "=v")
+ (vec_merge:VI2F
+ (vec_duplicate:VI2F
+ (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "rm"))
+ (match_operand:VI2F 1 "const0_operand" "C")
(const_int 1)))]
"TARGET_AVX512FP16"
"vmovw\t{%2, %x0|%x0, %2}"
--
2.18.1
More information about the Gcc-patches
mailing list