]>
gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/avx512bf16intrin.h
1 /* Copyright (C) 2019-2023 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
28 #ifndef _AVX512BF16INTRIN_H_INCLUDED
29 #define _AVX512BF16INTRIN_H_INCLUDED
31 #ifndef __AVX512BF16__
32 #pragma GCC push_options
33 #pragma GCC target("avx512bf16")
34 #define __DISABLE_AVX512BF16__
35 #endif /* __AVX512BF16__ */
37 /* Convert One BF16 Data to One Single Float Data. */
39 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
40 _mm_cvtsbh_ss (__bf16 __A
)
42 return __builtin_ia32_cvtbf2sf (__A
);
45 #ifdef __DISABLE_AVX512BF16__
46 #undef __DISABLE_AVX512BF16__
47 #pragma GCC pop_options
48 #endif /* __DISABLE_AVX512BF16__ */
50 #if !defined (__AVX512BF16__) || !defined (__EVEX512__)
51 #pragma GCC push_options
52 #pragma GCC target("avx512bf16,evex512")
53 #define __DISABLE_AVX512BF16_512__
54 #endif /* __AVX512BF16_512__ */
56 /* Internal data types for implementing the intrinsics. */
57 typedef __bf16 __v32bf
__attribute__ ((__vector_size__ (64)));
59 /* The Intel API is flexible enough that we must allow aliasing with other
60 vector types, and their scalar components. */
61 typedef __bf16 __m512bh
__attribute__ ((__vector_size__ (64), __may_alias__
));
65 extern __inline __m512bh
66 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
67 _mm512_cvtne2ps_pbh (__m512 __A
, __m512 __B
)
69 return (__m512bh
)__builtin_ia32_cvtne2ps2bf16_v32bf(__A
, __B
);
72 extern __inline __m512bh
73 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
74 _mm512_mask_cvtne2ps_pbh (__m512bh __A
, __mmask32 __B
, __m512 __C
, __m512 __D
)
76 return (__m512bh
)__builtin_ia32_cvtne2ps2bf16_v32bf_mask(__C
, __D
, __A
, __B
);
79 extern __inline __m512bh
80 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
81 _mm512_maskz_cvtne2ps_pbh (__mmask32 __A
, __m512 __B
, __m512 __C
)
83 return (__m512bh
)__builtin_ia32_cvtne2ps2bf16_v32bf_maskz(__B
, __C
, __A
);
88 extern __inline __m256bh
89 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
90 _mm512_cvtneps_pbh (__m512 __A
)
92 return (__m256bh
)__builtin_ia32_cvtneps2bf16_v16sf(__A
);
95 extern __inline __m256bh
96 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
97 _mm512_mask_cvtneps_pbh (__m256bh __A
, __mmask16 __B
, __m512 __C
)
99 return (__m256bh
)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C
, __A
, __B
);
102 extern __inline __m256bh
103 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
104 _mm512_maskz_cvtneps_pbh (__mmask16 __A
, __m512 __B
)
106 return (__m256bh
)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B
, __A
);
111 extern __inline __m512
112 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
113 _mm512_dpbf16_ps (__m512 __A
, __m512bh __B
, __m512bh __C
)
115 return (__m512
)__builtin_ia32_dpbf16ps_v16sf(__A
, __B
, __C
);
118 extern __inline __m512
119 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
120 _mm512_mask_dpbf16_ps (__m512 __A
, __mmask16 __B
, __m512bh __C
, __m512bh __D
)
122 return (__m512
)__builtin_ia32_dpbf16ps_v16sf_mask(__A
, __C
, __D
, __B
);
125 extern __inline __m512
126 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
127 _mm512_maskz_dpbf16_ps (__mmask16 __A
, __m512 __B
, __m512bh __C
, __m512bh __D
)
129 return (__m512
)__builtin_ia32_dpbf16ps_v16sf_maskz(__B
, __C
, __D
, __A
);
132 extern __inline __m512
133 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
134 _mm512_cvtpbh_ps (__m256bh __A
)
136 return (__m512
)_mm512_castsi512_ps ((__m512i
)_mm512_slli_epi32 (
137 (__m512i
)_mm512_cvtepi16_epi32 ((__m256i
)__A
), 16));
140 extern __inline __m512
141 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
142 _mm512_maskz_cvtpbh_ps (__mmask16 __U
, __m256bh __A
)
144 return (__m512
)_mm512_castsi512_ps ((__m512i
) _mm512_slli_epi32 (
145 (__m512i
)_mm512_maskz_cvtepi16_epi32 (
146 (__mmask16
)__U
, (__m256i
)__A
), 16));
149 extern __inline __m512
150 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
151 _mm512_mask_cvtpbh_ps (__m512 __S
, __mmask16 __U
, __m256bh __A
)
153 return (__m512
)_mm512_castsi512_ps ((__m512i
)(_mm512_mask_slli_epi32 (
154 (__m512i
)__S
, (__mmask16
)__U
,
155 (__m512i
)_mm512_cvtepi16_epi32 ((__m256i
)__A
), 16)));
158 #ifdef __DISABLE_AVX512BF16_512__
159 #undef __DISABLE_AVX512BF16_512__
160 #pragma GCC pop_options
161 #endif /* __DISABLE_AVX512BF16_512__ */
163 #endif /* _AVX512BF16INTRIN_H_INCLUDED */
This page took 0.042113 seconds and 5 git commands to generate.