libstdc++
simd_math.h
1// Math overloads for simd -*- C++ -*-
2
3// Copyright (C) 2020-2024 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
26#define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
27
28#if __cplusplus >= 201703L
29
30#include <utility>
31#include <iomanip>
32
33_GLIBCXX_SIMD_BEGIN_NAMESPACE
34template <typename _Tp, typename _V>
35 using _Samesize = fixed_size_simd<_Tp, _V::size()>;
36
37// _Math_return_type {{{
38template <typename _DoubleR, typename _Tp, typename _Abi>
39 struct _Math_return_type;
40
41template <typename _DoubleR, typename _Tp, typename _Abi>
42 using _Math_return_type_t =
43 typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
44
45template <typename _Tp, typename _Abi>
46 struct _Math_return_type<double, _Tp, _Abi>
47 { using type = simd<_Tp, _Abi>; };
48
49template <typename _Tp, typename _Abi>
50 struct _Math_return_type<bool, _Tp, _Abi>
51 { using type = simd_mask<_Tp, _Abi>; };
52
53template <typename _DoubleR, typename _Tp, typename _Abi>
54 struct _Math_return_type
55 { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
56
57//}}}
58// _GLIBCXX_SIMD_MATH_CALL_ {{{
59#define _GLIBCXX_SIMD_MATH_CALL_(__name) \
60template <typename _Tp, typename _Abi, typename..., \
61 typename _R = _Math_return_type_t< \
62 decltype(std::__name(declval<double>())), _Tp, _Abi>> \
63 _GLIBCXX_SIMD_ALWAYS_INLINE \
64 enable_if_t<is_floating_point_v<_Tp>, _R> \
65 __name(simd<_Tp, _Abi> __x) \
66 { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
67
68// }}}
69//_Extra_argument_type{{{
70template <typename _Up, typename _Tp, typename _Abi>
71 struct _Extra_argument_type;
72
73template <typename _Tp, typename _Abi>
74 struct _Extra_argument_type<_Tp*, _Tp, _Abi>
75 {
76 using type = simd<_Tp, _Abi>*;
77 static constexpr double* declval();
78 static constexpr bool __needs_temporary_scalar = true;
79
80 _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
81 { return &__data(*__x); }
82 };
83
84template <typename _Up, typename _Tp, typename _Abi>
85 struct _Extra_argument_type<_Up*, _Tp, _Abi>
86 {
87 static_assert(is_integral_v<_Up>);
88 using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
89 static constexpr _Up* declval();
90 static constexpr bool __needs_temporary_scalar = true;
91
92 _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
93 { return &__data(*__x); }
94 };
95
96template <typename _Tp, typename _Abi>
97 struct _Extra_argument_type<_Tp, _Tp, _Abi>
98 {
99 using type = simd<_Tp, _Abi>;
100 static constexpr double declval();
101 static constexpr bool __needs_temporary_scalar = false;
102
103 _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
104 _S_data(const type& __x)
105 { return __data(__x); }
106 };
107
108template <typename _Up, typename _Tp, typename _Abi>
109 struct _Extra_argument_type
110 {
111 static_assert(is_integral_v<_Up>);
112 using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
113 static constexpr _Up declval();
114 static constexpr bool __needs_temporary_scalar = false;
115
116 _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
117 _S_data(const type& __x)
118 { return __data(__x); }
119 };
120
121//}}}
122// _GLIBCXX_SIMD_MATH_CALL2_ {{{
123#define _GLIBCXX_SIMD_MATH_CALL2_(__name, __arg2) \
124template < \
125 typename _Tp, typename _Abi, typename..., \
126 typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \
127 typename _R = _Math_return_type_t< \
128 decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \
129 _GLIBCXX_SIMD_ALWAYS_INLINE \
130 enable_if_t<is_floating_point_v<_Tp>, _R> \
131 __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \
132 { \
133 return {__private_init, \
134 _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \
135 } \
136template <typename _Up, typename _Tp, typename _Abi> \
137 _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \
138 decltype(std::__name( \
139 declval<double>(), \
140 declval<enable_if_t< \
141 conjunction_v< \
142 is_same<__arg2, _Tp>, \
143 negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \
144 is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \
145 double>>())), \
146 _Tp, _Abi> \
147 __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \
148 { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
149
150// }}}
151// _GLIBCXX_SIMD_MATH_CALL3_ {{{
152#define _GLIBCXX_SIMD_MATH_CALL3_(__name, __arg2, __arg3) \
153template <typename _Tp, typename _Abi, typename..., \
154 typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \
155 typename _Arg3 = _Extra_argument_type<__arg3, _Tp, _Abi>, \
156 typename _R = _Math_return_type_t< \
157 decltype(std::__name(declval<double>(), _Arg2::declval(), \
158 _Arg3::declval())), \
159 _Tp, _Abi>> \
160 _GLIBCXX_SIMD_ALWAYS_INLINE \
161 enable_if_t<is_floating_point_v<_Tp>, _R> \
162 __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \
163 const typename _Arg3::type& __z) \
164 { \
165 return {__private_init, \
166 _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \
167 _Arg3::_S_data(__z))}; \
168 } \
169template < \
170 typename _T0, typename _T1, typename _T2, typename..., \
171 typename _U0 = __remove_cvref_t<_T0>, \
172 typename _U1 = __remove_cvref_t<_T1>, \
173 typename _U2 = __remove_cvref_t<_T2>, \
174 typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \
175 typename = enable_if_t<conjunction_v< \
176 is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \
177 is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \
178 negation<conjunction< \
179 is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \
180 _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \
181 declval<const _Simd&>(), \
182 declval<const _Simd&>())) \
183 __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \
184 { \
185 return __name(_Simd(static_cast<_T0&&>(__xx)), \
186 _Simd(static_cast<_T1&&>(__yy)), \
187 _Simd(static_cast<_T2&&>(__zz))); \
188 }
189
190// }}}
191// __cosSeries {{{
192template <typename _Abi>
193 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
194 __cosSeries(const simd<float, _Abi>& __x)
195 {
196 const simd<float, _Abi> __x2 = __x * __x;
197 simd<float, _Abi> __y;
198 __y = 0x1.ap-16f; // 1/8!
199 __y = __y * __x2 - 0x1.6c1p-10f; // -1/6!
200 __y = __y * __x2 + 0x1.555556p-5f; // 1/4!
201 return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
202 }
203
204template <typename _Abi>
205 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
206 __cosSeries(const simd<double, _Abi>& __x)
207 {
208 const simd<double, _Abi> __x2 = __x * __x;
209 simd<double, _Abi> __y;
210 __y = 0x1.AC00000000000p-45; // 1/16!
211 __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
212 __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12!
213 __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
214 __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8!
215 __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
216 __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4!
217 return (__y * __x2 - .5f) * __x2 + 1.f;
218 }
219
220// }}}
221// __sinSeries {{{
222template <typename _Abi>
223 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
224 __sinSeries(const simd<float, _Abi>& __x)
225 {
226 const simd<float, _Abi> __x2 = __x * __x;
227 simd<float, _Abi> __y;
228 __y = -0x1.9CC000p-13f; // -1/7!
229 __y = __y * __x2 + 0x1.111100p-7f; // 1/5!
230 __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
231 return __y * (__x2 * __x) + __x;
232 }
233
234template <typename _Abi>
235 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
236 __sinSeries(const simd<double, _Abi>& __x)
237 {
238 // __x = [0, 0.7854 = pi/4]
239 // __x² = [0, 0.6169 = pi²/8]
240 const simd<double, _Abi> __x2 = __x * __x;
241 simd<double, _Abi> __y;
242 __y = -0x1.ACF0000000000p-41; // -1/15!
243 __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13!
244 __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
245 __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9!
246 __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
247 __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5!
248 __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3!
249 return __y * (__x2 * __x) + __x;
250 }
251
252// }}}
253// __zero_low_bits {{{
254template <int _Bits, typename _Tp, typename _Abi>
255 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
256 __zero_low_bits(simd<_Tp, _Abi> __x)
257 {
258 const simd<_Tp, _Abi> __bitmask
259 = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
260 return {__private_init,
261 _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
262 }
263
264// }}}
265// __fold_input {{{
266
267/**@internal
268 * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
269 * quadrant 0: [-¼π, ¼π]
270 * quadrant 1: [ ¼π, ¾π]
271 * quadrant 2: [ ¾π, 1¼π]
272 * quadrant 3: [1¼π, 1¾π]
273 *
274 * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
275 * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
276 * ```
277 * y = trunc(x / ¼π);
278 * y += fmod(y, 2);
279 * ```
280 * This can be simplified by moving the (implicit) division by 2 into the
281 * truncation expression. The `+= fmod` effect can the be achieved by using
282 * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
283 * `2/π * x` is better (faster).
284 */
285template <typename _Tp, typename _Abi>
286 struct _Folded
287 {
288 simd<_Tp, _Abi> _M_x;
289 rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
290 };
291
292namespace __math_float {
293inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
294inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π
295inline constexpr float __pi_2_5bits0
296 = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
297inline constexpr float __pi_2_5bits0_rem
298 = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
299} // namespace __math_float
300namespace __math_double {
301inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
302inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π
303inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2
304} // namespace __math_double
305
306template <typename _Abi>
307 _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
308 __fold_input(const simd<float, _Abi>& __x)
309 {
310 using _V = simd<float, _Abi>;
311 using _IV = rebind_simd_t<int, _V>;
312 using namespace __math_float;
313 _Folded<float, _Abi> __r;
314 __r._M_x = abs(__x);
315#if 0
316 // zero most mantissa bits:
317 constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π
318 const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
319 // split π into 4 parts, the first three with 13 trailing zeros (to make the
320 // following multiplications precise):
321 constexpr float __pi0 = 0x1.920000p1f;
322 constexpr float __pi1 = 0x1.fb4000p-11f;
323 constexpr float __pi2 = 0x1.444000p-23f;
324 constexpr float __pi3 = 0x1.68c234p-38f;
325 __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
326#else
327 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
328 __r._M_quadrant = 0;
329 else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
330 {
331 const _V __y = nearbyint(__r._M_x * __2_over_pi);
332 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
333 __r._M_x -= __y * __pi_2_5bits0;
334 __r._M_x -= __y * __pi_2_5bits0_rem;
335 }
336 else
337 {
338 using __math_double::__2_over_pi;
339 using __math_double::__pi_2;
340 using _VD = rebind_simd_t<double, _V>;
341 _VD __xd = static_simd_cast<_VD>(__r._M_x);
342 _VD __y = nearbyint(__xd * __2_over_pi);
343 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
344 __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
345 }
346#endif
347 return __r;
348 }
349
350template <typename _Abi>
351 _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
352 __fold_input(const simd<double, _Abi>& __x)
353 {
354 using _V = simd<double, _Abi>;
355 using _IV = rebind_simd_t<int, _V>;
356 using namespace __math_double;
357
358 _Folded<double, _Abi> __r;
359 __r._M_x = abs(__x);
360 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
361 {
362 __r._M_quadrant = 0;
363 return __r;
364 }
365 const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
366 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
367
368 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
369 {
370 // x - y * pi/2, y uses no more than 11 mantissa bits
371 __r._M_x -= __y * 0x1.921FB54443000p0;
372 __r._M_x -= __y * -0x1.73DCB3B39A000p-43;
373 __r._M_x -= __y * 0x1.45C06E0E68948p-86;
374 }
375 else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
376 {
377 // x - y * pi/2, y uses no more than 29 mantissa bits
378 __r._M_x -= __y * 0x1.921FB40000000p0;
379 __r._M_x -= __y * 0x1.4442D00000000p-24;
380 __r._M_x -= __y * 0x1.8469898CC5170p-48;
381 }
382 else
383 {
384 // x - y * pi/2, y may require all mantissa bits
385 const _V __y_hi = __zero_low_bits<26>(__y);
386 const _V __y_lo = __y - __y_hi;
387 const auto __pi_2_1 = 0x1.921FB50000000p0;
388 const auto __pi_2_2 = 0x1.110B460000000p-26;
389 const auto __pi_2_3 = 0x1.1A62630000000p-54;
390 const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
391 __r._M_x = __r._M_x - __y_hi * __pi_2_1
392 - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
393 - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
394 - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
395 - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
396 - max(__y * __pi_2_4, __y_lo * __pi_2_3)
397 - min(__y * __pi_2_4, __y_lo * __pi_2_3);
398 }
399 return __r;
400 }
401
402// }}}
403// __extract_exponent_as_int {{{
404template <typename _Tp, typename _Abi>
405 _GLIBCXX_SIMD_INTRINSIC
406 rebind_simd_t<int, simd<_Tp, _Abi>>
407 __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
408 {
409 using _Vp = simd<_Tp, _Abi>;
410 using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
411 using namespace std::experimental::__float_bitwise_operators;
412 using namespace std::experimental::__proposed;
413 const _Vp __exponent_mask
414 = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
415 return static_simd_cast<rebind_simd_t<int, _Vp>>(
416 simd_bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
417 >> (__digits_v<_Tp> - 1));
418 }
419
420// }}}
421// __impl_or_fallback {{{
422template <typename ImplFun, typename FallbackFun, typename... _Args>
423 _GLIBCXX_SIMD_INTRINSIC auto
424 __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
425 _Args&&... __args)
426 -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
427 { return __impl_fun(static_cast<_Args&&>(__args)...); }
428
429template <typename ImplFun, typename FallbackFun, typename... _Args,
430 typename = __detail::__odr_helper>
431 inline auto
432 __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
433 _Args&&... __args)
434 -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
435 { return __fallback_fun(static_cast<_Args&&>(__args)...); }
436
437template <typename... _Args>
438 _GLIBCXX_SIMD_INTRINSIC auto
439 __impl_or_fallback(_Args&&... __args)
440 {
441 return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
442 }
443//}}}
444
445// trigonometric functions {{{
446_GLIBCXX_SIMD_MATH_CALL_(acos)
447_GLIBCXX_SIMD_MATH_CALL_(asin)
448_GLIBCXX_SIMD_MATH_CALL_(atan)
449_GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
450
451/*
452 * algorithm for sine and cosine:
453 *
454 * The result can be calculated with sine or cosine depending on the π/4 section
455 * the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x²
456 *
457 * sine:
458 * Map -__x to __x and invert the output
459 * Extend precision of __x - n * π/4 by calculating
460 * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
461 *
462 * Calculate Taylor series with tuned coefficients.
463 * Fix sign.
464 */
465// cos{{{
466template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
467 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
468 cos(const simd<_Tp, _Abi>& __x)
469 {
470 using _V = simd<_Tp, _Abi>;
471 if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
472 return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
473 else
474 {
475 if constexpr (is_same_v<_Tp, float>)
476 if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
477 return static_simd_cast<_V>(
478 cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
479
480 const auto __f = __fold_input(__x);
481 // quadrant | effect
482 // 0 | cosSeries, +
483 // 1 | sinSeries, -
484 // 2 | cosSeries, -
485 // 3 | sinSeries, +
486 using namespace std::experimental::__float_bitwise_operators;
487 const _V __sign_flip
488 = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
489
490 const auto __need_cos = (__f._M_quadrant & 1) == 0;
491 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
492 return __sign_flip ^ __cosSeries(__f._M_x);
493 else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
494 return __sign_flip ^ __sinSeries(__f._M_x);
495 else // some_of(__need_cos)
496 {
497 _V __r = __sinSeries(__f._M_x);
498 where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
499 return __r ^ __sign_flip;
500 }
501 }
502 }
503
504template <typename _Tp>
505 _GLIBCXX_SIMD_ALWAYS_INLINE
506 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
507 cos(simd<_Tp, simd_abi::scalar> __x)
508 { return std::cos(__data(__x)); }
509
510//}}}
511// sin{{{
512template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
513 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
514 sin(const simd<_Tp, _Abi>& __x)
515 {
516 using _V = simd<_Tp, _Abi>;
517 if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
518 return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
519 else
520 {
521 if constexpr (is_same_v<_Tp, float>)
522 if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
523 return static_simd_cast<_V>(
524 sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
525
526 const auto __f = __fold_input(__x);
527 // quadrant | effect
528 // 0 | sinSeries
529 // 1 | cosSeries
530 // 2 | sinSeries, sign flip
531 // 3 | cosSeries, sign flip
532 using namespace std::experimental::__float_bitwise_operators;
533 const auto __sign_flip
534 = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
535
536 const auto __need_sin = (__f._M_quadrant & 1) == 0;
537 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
538 return __sign_flip ^ __sinSeries(__f._M_x);
539 else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
540 return __sign_flip ^ __cosSeries(__f._M_x);
541 else // some_of(__need_sin)
542 {
543 _V __r = __cosSeries(__f._M_x);
544 where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
545 return __sign_flip ^ __r;
546 }
547 }
548 }
549
550template <typename _Tp>
551 _GLIBCXX_SIMD_ALWAYS_INLINE
552 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
553 sin(simd<_Tp, simd_abi::scalar> __x)
554 { return std::sin(__data(__x)); }
555
556//}}}
557_GLIBCXX_SIMD_MATH_CALL_(tan)
558_GLIBCXX_SIMD_MATH_CALL_(acosh)
559_GLIBCXX_SIMD_MATH_CALL_(asinh)
560_GLIBCXX_SIMD_MATH_CALL_(atanh)
561_GLIBCXX_SIMD_MATH_CALL_(cosh)
562_GLIBCXX_SIMD_MATH_CALL_(sinh)
563_GLIBCXX_SIMD_MATH_CALL_(tanh)
564// }}}
565// exponential functions {{{
566_GLIBCXX_SIMD_MATH_CALL_(exp)
567_GLIBCXX_SIMD_MATH_CALL_(exp2)
568_GLIBCXX_SIMD_MATH_CALL_(expm1)
569
570// }}}
571// frexp {{{
572#if _GLIBCXX_SIMD_X86INTRIN
573template <typename _Tp, size_t _Np>
574 _GLIBCXX_SIMD_INTRINSIC
575 _SimdWrapper<_Tp, _Np>
576 __getexp(_SimdWrapper<_Tp, _Np> __x)
577 {
578 if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
579 return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
580 else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
581 return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
582 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
583 return _mm_getexp_pd(__x);
584 else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
585 return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
586 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
587 return _mm256_getexp_ps(__x);
588 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
589 return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
590 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
591 return _mm256_getexp_pd(__x);
592 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
593 return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
594 else if constexpr (__is_avx512_ps<_Tp, _Np>())
595 return _mm512_getexp_ps(__x);
596 else if constexpr (__is_avx512_pd<_Tp, _Np>())
597 return _mm512_getexp_pd(__x);
598 else
599 __assert_unreachable<_Tp>();
600 }
601
602template <typename _Tp, size_t _Np>
603 _GLIBCXX_SIMD_INTRINSIC
604 _SimdWrapper<_Tp, _Np>
605 __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
606 {
607 if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
608 return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
609 _MM_MANT_SIGN_src));
610 else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
611 return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
612 _MM_MANT_NORM_p5_1,
613 _MM_MANT_SIGN_src));
614 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
615 return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
616 else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
617 return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
618 _MM_MANT_SIGN_src));
619 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
620 return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
621 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
622 return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
623 _MM_MANT_SIGN_src));
624 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
625 return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
626 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
627 return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
628 _MM_MANT_SIGN_src));
629 else if constexpr (__is_avx512_ps<_Tp, _Np>())
630 return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
631 else if constexpr (__is_avx512_pd<_Tp, _Np>())
632 return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
633 else
634 __assert_unreachable<_Tp>();
635 }
636#endif // _GLIBCXX_SIMD_X86INTRIN
637
638/**
639 * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
640 *
641 * The return value will be in the range [0.5, 1.0[
642 * The @p __e value will be an integer defining the power-of-two exponent
643 */
644template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
645 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
646 frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
647 {
648 if constexpr (simd_size_v<_Tp, _Abi> == 1)
649 {
650 int __tmp;
651 const auto __r = std::frexp(__x[0], &__tmp);
652 (*__exp)[0] = __tmp;
653 return __r;
654 }
655 else if constexpr (__is_sve_abi<_Abi>())
656 {
657 simd<_Tp, _Abi> __r;
658 __execute_n_times<simd_size_v<_Tp, _Abi>>(
659 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
660 int __tmp;
661 const auto __ri = std::frexp(__x[__i], &__tmp);
662 (*__exp)[__i] = __tmp;
663 __r[__i] = __ri;
664 });
665 return __r;
666 }
667 else if constexpr (__is_fixed_size_abi_v<_Abi>)
668 return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
669#if _GLIBCXX_SIMD_X86INTRIN
670 else if constexpr (__have_avx512f)
671 {
672 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
673 constexpr size_t _NI = _Np < 4 ? 4 : _Np;
674 const auto __v = __data(__x);
675 const auto __isnonzero
676 = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
677 const _SimdWrapper<int, _NI> __exp_plus1
678 = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
679 const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
680 _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
681 _SimdWrapper<int, _NI>(), __exp_plus1));
682 simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
683 return {__private_init,
684 _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
685 __isnonzero),
686 __v, __getmant_avx512(__v))};
687 }
688#endif // _GLIBCXX_SIMD_X86INTRIN
689 else
690 {
691 // fallback implementation
692 static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
693 using _V = simd<_Tp, _Abi>;
694 using _IV = rebind_simd_t<int, _V>;
695 using namespace std::experimental::__proposed;
696 using namespace std::experimental::__float_bitwise_operators;
697
698 constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
699 constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
700 constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
701 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
702 = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
703 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
704 = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
705
706 _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
707 const _IV __exponent_bits = __extract_exponent_as_int(__x);
708 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
709 {
710 *__exp
711 = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
712 return __mant;
713 }
714
715#if __FINITE_MATH_ONLY__
716 // at least one element of __x is 0 or subnormal, the rest is normal
717 // (inf and NaN are excluded by -ffinite-math-only)
718 const auto __iszero_inf_nan = __x == 0;
719#else
720 using _Ip = __int_for_sizeof_t<_Tp>;
721 const auto __as_int = simd_bit_cast<rebind_simd_t<_Ip, _V>>(abs(__x));
722 const auto __inf = simd_bit_cast<rebind_simd_t<_Ip, _V>>(_V(__infinity_v<_Tp>));
723 const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
724 __as_int == 0 || __as_int >= __inf);
725#endif
726
727 const _V __scaled_subnormal = __x * __subnorm_scale;
728 const _V __mant_subnormal
729 = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
730 where(!isnormal(__x), __mant) = __mant_subnormal;
731 where(__iszero_inf_nan, __mant) = __x;
732 _IV __e = __extract_exponent_as_int(__scaled_subnormal);
733 using _MaskType =
734 typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
735 _V, _IV>::mask_type;
736 const _MaskType __value_isnormal = isnormal(__x).__cvt();
737 where(__value_isnormal.__cvt(), __e) = __exponent_bits;
738 static_assert(sizeof(_IV) == sizeof(__value_isnormal));
739 const _IV __offset
740 = (simd_bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
741 | (simd_bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
742 & static_simd_cast<_MaskType>(__x != 0))
743 & _IV(__exp_adjust + __exp_offset));
744 *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
745 return __mant;
746 }
747 }
748
749// }}}
750_GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
751_GLIBCXX_SIMD_MATH_CALL_(ilogb)
752
753// logarithms {{{
754_GLIBCXX_SIMD_MATH_CALL_(log)
755_GLIBCXX_SIMD_MATH_CALL_(log10)
756_GLIBCXX_SIMD_MATH_CALL_(log1p)
757_GLIBCXX_SIMD_MATH_CALL_(log2)
758
759//}}}
760// logb{{{
761template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
762 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
763 logb(const simd<_Tp, _Abi>& __x)
764 {
765 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
766 if constexpr (_Np == 1)
767 return std::logb(__x[0]);
768 else if constexpr (__is_fixed_size_abi_v<_Abi>)
769 return {__private_init, _Abi::_SimdImpl::_S_logb(__data(__x))};
770#if _GLIBCXX_SIMD_X86INTRIN // {{{
771 else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
772 return {__private_init,
773 __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
774 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
775 return {__private_init, _mm_getexp_pd(__data(__x))};
776 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
777 return {__private_init, _mm256_getexp_ps(__data(__x))};
778 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
779 return {__private_init, _mm256_getexp_pd(__data(__x))};
780 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
781 return {__private_init,
782 __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
783 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
784 return {__private_init,
785 __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
786 else if constexpr (__is_avx512_ps<_Tp, _Np>())
787 return {__private_init, _mm512_getexp_ps(__data(__x))};
788 else if constexpr (__is_avx512_pd<_Tp, _Np>())
789 return {__private_init, _mm512_getexp_pd(__data(__x))};
790#endif // _GLIBCXX_SIMD_X86INTRIN }}}
791 else
792 {
793 using _V = simd<_Tp, _Abi>;
794 using namespace std::experimental::__proposed;
795 auto __is_normal = isnormal(__x);
796
797 // work on abs(__x) to reflect the return value on Linux for negative
798 // inputs (domain-error => implementation-defined value is returned)
799 const _V abs_x = abs(__x);
800
801 // __exponent(__x) returns the exponent value (bias removed) as
802 // simd<_Up> with integral _Up
803 auto&& __exponent = [](const _V& __v) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
804 using namespace std::experimental::__proposed;
805 using _IV = rebind_simd_t<
806 conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
807 return (simd_bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
808 - (__max_exponent_v<_Tp> - 1);
809 };
810 _V __r = static_simd_cast<_V>(__exponent(abs_x));
811 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
812 // without corner cases (nan, inf, subnormal, zero) we have our
813 // answer:
814 return __r;
815 const auto __is_zero = __x == 0;
816 const auto __is_nan = isnan(__x);
817 const auto __is_inf = isinf(__x);
818 where(__is_zero, __r) = -__infinity_v<_Tp>;
819 where(__is_nan, __r) = __x;
820 where(__is_inf, __r) = __infinity_v<_Tp>;
821 __is_normal |= __is_zero || __is_nan || __is_inf;
822 if (all_of(__is_normal))
823 // at this point everything but subnormals is handled
824 return __r;
825 // subnormals repeat the exponent extraction after multiplication of the
826 // input with __a floating point value that has 112 (0x70) in its exponent
827 // (not too big for sp and large enough for dp)
828 const _V __scaled = abs_x * _Tp(0x1p112);
829 _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
830 where(__is_normal, __scaled_exp) = __r;
831 return __scaled_exp;
832 }
833 }
834
835//}}}
836template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
837 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
838 modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
839 {
840 if constexpr (simd_size_v<_Tp, _Abi> == 1)
841 {
842 _Tp __tmp;
843 _Tp __r = std::modf(__x[0], &__tmp);
844 __iptr[0] = __tmp;
845 return __r;
846 }
847 else
848 {
849 const auto __integral = trunc(__x);
850 *__iptr = __integral;
851 auto __r = __x - __integral;
852#if !__FINITE_MATH_ONLY__
853 where(isinf(__x), __r) = _Tp();
854#endif
855 return copysign(__r, __x);
856 }
857 }
858
859_GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
860_GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
861
862_GLIBCXX_SIMD_MATH_CALL_(cbrt)
863
864_GLIBCXX_SIMD_MATH_CALL_(abs)
865_GLIBCXX_SIMD_MATH_CALL_(fabs)
866
867// [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
868// allow signed integral _Tp
869template <typename _Tp, typename _Abi>
870 _GLIBCXX_SIMD_ALWAYS_INLINE
871 enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
872 abs(const simd<_Tp, _Abi>& __x)
873 { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
874
875#define _GLIBCXX_SIMD_CVTING2(_NAME) \
876template <typename _Tp, typename _Abi> \
877 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
878 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
879 { \
880 return _NAME(__x, __y); \
881 } \
882 \
883template <typename _Tp, typename _Abi> \
884 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
885 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
886 { \
887 return _NAME(__x, __y); \
888 }
889
890#define _GLIBCXX_SIMD_CVTING3(_NAME) \
891template <typename _Tp, typename _Abi> \
892 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
893 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
894 const simd<_Tp, _Abi>& __z) \
895 { \
896 return _NAME(__x, __y, __z); \
897 } \
898 \
899template <typename _Tp, typename _Abi> \
900 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
901 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
902 const simd<_Tp, _Abi>& __z) \
903 { \
904 return _NAME(__x, __y, __z); \
905 } \
906 \
907template <typename _Tp, typename _Abi> \
908 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
909 const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \
910 const __type_identity_t<simd<_Tp, _Abi>>& __z) \
911 { \
912 return _NAME(__x, __y, __z); \
913 } \
914 \
915template <typename _Tp, typename _Abi> \
916 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
917 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
918 const __type_identity_t<simd<_Tp, _Abi>>& __z) \
919 { \
920 return _NAME(__x, __y, __z); \
921 } \
922 \
923template <typename _Tp, typename _Abi> \
924 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
925 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
926 const __type_identity_t<simd<_Tp, _Abi>>& __z) \
927 { \
928 return _NAME(__x, __y, __z); \
929 } \
930 \
931template <typename _Tp, typename _Abi> \
932 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
933 const __type_identity_t<simd<_Tp, _Abi>>& __x, \
934 const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
935 { \
936 return _NAME(__x, __y, __z); \
937 }
938
939template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
940 _GLIBCXX_SIMD_INTRINSIC _R
941 __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
942 const _Tps&... __args)
943 {
944 return {__private_init,
945 __data(__arg0)._M_apply_per_chunk(
946 [&](auto __impl, const auto&... __inner) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
947 using _V = typename decltype(__impl)::simd_type;
948 return __data(__apply(_V(__private_init, __inner)...));
949 },
950 __data(__args)...)};
951 }
952
953template <typename _VV, typename = __detail::__odr_helper>
954 __remove_cvref_t<_VV>
955 __hypot(_VV __x, _VV __y)
956 {
957 using _V = __remove_cvref_t<_VV>;
958 using _Tp = typename _V::value_type;
959 if constexpr (_V::size() == 1)
960 return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
961 else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
962 {
963 return __fixed_size_apply<_V>([](auto __a,
964 auto __b) { return hypot(__a, __b); },
965 __x, __y);
966 }
967 else
968 {
969 // A simple solution for _Tp == float would be to cast to double and
970 // simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
971 // dp. It still needs the Annex F fixups though and isn't faster on
972 // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
973 // AVX-512).
974 using namespace __float_bitwise_operators;
975 using namespace __proposed;
976 _V __absx = abs(__x); // no error
977 _V __absy = abs(__y); // no error
978 _V __hi = max(__absx, __absy); // no error
979 _V __lo = min(__absy, __absx); // no error
980
981 // round __hi down to the next power-of-2:
982 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
983
984#ifndef __FAST_MATH__
985 if constexpr (__have_neon && !__have_neon_a32)
986 { // With ARMv7 NEON, we have no subnormals and must use slightly
987 // different strategy
988 const _V __hi_exp = __hi & __inf;
989 _V __scale_back = __hi_exp;
990 // For large exponents (max & max/2) the inversion comes too close
991 // to subnormals. Subtract 3 from the exponent:
992 where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
993 // Invert and adjust for the off-by-one error of inversion via xor:
994 const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
995 const _V __h1 = __hi * __scale;
996 const _V __l1 = __lo * __scale;
997 _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
998 // Fix up hypot(0, 0) to not be NaN:
999 where(__hi == 0, __r) = 0;
1000 return __r;
1001 }
1002#endif
1003
1004#ifdef __FAST_MATH__
1005 // With fast-math, ignore precision of subnormals and inputs from
1006 // __finite_max_v/2 to __finite_max_v. This removes all
1007 // branching/masking.
1008 if constexpr (true)
1009#else
1010 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1011 && all_of(isnormal(__y))))
1012#endif
1013 {
1014 const _V __hi_exp = __hi & __inf;
1015 //((__hi + __hi) & __inf) ^ __inf almost works for computing
1016 //__scale,
1017 // except when (__hi + __hi) & __inf == __inf, in which case __scale
1018 // becomes 0 (should be min/2 instead) and thus loses the
1019 // information from __lo.
1020#ifdef __FAST_MATH__
1021 using _Ip = __int_for_sizeof_t<_Tp>;
1022 using _IV = rebind_simd_t<_Ip, _V>;
1023 const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
1024 const _V __scale
1025 = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1026#else
1027 const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1028#endif
1029 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
1030 = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1031 const _V __h1 = (__hi & __mant_mask) | _V(1);
1032 const _V __l1 = __lo * __scale;
1033 return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1034 }
1035 else
1036 {
1037 // slower path to support subnormals
1038 // if __hi is subnormal, avoid scaling by inf & final mul by 0
1039 // (which yields NaN) by using min()
1040 _V __scale = _V(1 / __norm_min_v<_Tp>);
1041 // invert exponent w/o error and w/o using the slow divider unit:
1042 // xor inverts the exponent but off by 1. Multiplication with .5
1043 // adjusts for the discrepancy.
1044 where(__hi >= __norm_min_v<_Tp>, __scale)
1045 = ((__hi & __inf) ^ __inf) * _Tp(.5);
1046 // adjust final exponent for subnormal inputs
1047 _V __hi_exp = __norm_min_v<_Tp>;
1048 where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1049 = __hi & __inf; // no error
1050 _V __h1 = __hi * __scale; // no error
1051 _V __l1 = __lo * __scale; // no error
1052
1053 // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
1054 // this ensures no overflow in the argument to sqrt
1055 _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1056#ifdef __STDC_IEC_559__
1057 // fixup for Annex F requirements
1058 // the naive fixup goes like this:
1059 //
1060 // where(__l1 == 0, __r) = __hi;
1061 // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>;
1062 // where(isinf(__absx) || isinf(__absy), __r) = __inf;
1063 //
1064 // The fixup can be prepared in parallel with the sqrt, requiring a
1065 // single blend step after hi_exp * sqrt, reducing latency and
1066 // throughput:
1067 _V __fixup = __hi; // __lo == 0
1068 where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
1069 where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
1070 where(!(__lo == 0 || isunordered(__x, __y)
1071 || (isinf(__absx) || isinf(__absy))),
1072 __fixup)
1073 = __r;
1074 __r = __fixup;
1075#endif
1076 return __r;
1077 }
1078 }
1079 }
1080
1081template <typename _Tp, typename _Abi>
1082 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1083 hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1084 {
1085 return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1086 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1087 __y);
1088 }
1089
1090_GLIBCXX_SIMD_CVTING2(hypot)
1091
1092 template <typename _VV, typename = __detail::__odr_helper>
1093 __remove_cvref_t<_VV>
1094 __hypot(_VV __x, _VV __y, _VV __z)
1095 {
1096 using _V = __remove_cvref_t<_VV>;
1097 using _Abi = typename _V::abi_type;
1098 using _Tp = typename _V::value_type;
1099 /* FIXME: enable after PR77776 is resolved
1100 if constexpr (_V::size() == 1)
1101 return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
1102 else
1103 */
1104 if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
1105 {
1106 return __fixed_size_apply<simd<_Tp, _Abi>>(
1107 [](auto __a, auto __b, auto __c) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1108 return hypot(__a, __b, __c);
1109 }, __x, __y, __z);
1110 }
1111 else
1112 {
1113 using namespace __float_bitwise_operators;
1114 using namespace __proposed;
1115 const _V __absx = abs(__x); // no error
1116 const _V __absy = abs(__y); // no error
1117 const _V __absz = abs(__z); // no error
1118 _V __hi = max(max(__absx, __absy), __absz); // no error
1119 _V __l0 = min(__absz, max(__absx, __absy)); // no error
1120 _V __l1 = min(__absy, __absx); // no error
1121 if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
1122 && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
1123 { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
1124 // NaN. In this case the bit-tricks don't work, they require IEC559
1125 // binary32 or binary64 format.
1126#ifdef __STDC_IEC_559__
1127 // fixup for Annex F requirements
1128 if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
1129 return __infinity_v<_Tp>;
1130 else if (isunordered(__absx[0], __absy[0] + __absz[0]))
1131 return __quiet_NaN_v<_Tp>;
1132 else if (__l0[0] == 0 && __l1[0] == 0)
1133 return __hi;
1134#endif
1135 _V __hi_exp = __hi;
1136 const _ULLong __tmp = 0x8000'0000'0000'0000ull;
1137 __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
1138 const _V __scale = 1 / __hi_exp;
1139 __hi *= __scale;
1140 __l0 *= __scale;
1141 __l1 *= __scale;
1142 return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
1143 }
1144 else
1145 {
1146 // round __hi down to the next power-of-2:
1147 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
1148
1149#ifndef __FAST_MATH__
1150 if constexpr (_V::size() > 1
1151 && __is_neon_abi<_Abi>() && __have_neon && !__have_neon_a32)
1152 { // With ARMv7 NEON, we have no subnormals and must use slightly
1153 // different strategy
1154 const _V __hi_exp = __hi & __inf;
1155 _V __scale_back = __hi_exp;
1156 // For large exponents (max & max/2) the inversion comes too
1157 // close to subnormals. Subtract 3 from the exponent:
1158 where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
1159 // Invert and adjust for the off-by-one error of inversion via
1160 // xor:
1161 const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
1162 const _V __h1 = __hi * __scale;
1163 __l0 *= __scale;
1164 __l1 *= __scale;
1165 _V __lo = __l0 * __l0
1166 + __l1 * __l1; // add the two smaller values first
1167 asm("" : "+m"(__lo));
1168 _V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
1169 // Fix up hypot(0, 0, 0) to not be NaN:
1170 where(__hi == 0, __r) = 0;
1171 return __r;
1172 }
1173#endif
1174
1175#ifdef __FAST_MATH__
1176 // With fast-math, ignore precision of subnormals and inputs from
1177 // __finite_max_v/2 to __finite_max_v. This removes all
1178 // branching/masking.
1179 if constexpr (true)
1180#else
1181 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1182 && all_of(isnormal(__y))
1183 && all_of(isnormal(__z))))
1184#endif
1185 {
1186 const _V __hi_exp = __hi & __inf;
1187 //((__hi + __hi) & __inf) ^ __inf almost works for computing
1188 //__scale, except when (__hi + __hi) & __inf == __inf, in which
1189 // case __scale
1190 // becomes 0 (should be min/2 instead) and thus loses the
1191 // information from __lo.
1192#ifdef __FAST_MATH__
1193 using _Ip = __int_for_sizeof_t<_Tp>;
1194 using _IV = rebind_simd_t<_Ip, _V>;
1195 const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
1196 const _V __scale
1197 = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1198#else
1199 const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1200#endif
1201 constexpr _Tp __mant_mask
1202 = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1203 const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
1204 __l0 *= __scale;
1205 __l1 *= __scale;
1206 const _V __lo
1207 = __l0 * __l0
1208 + __l1 * __l1; // add the two smaller values first
1209 return __hi_exp * sqrt(__lo + __h1 * __h1);
1210 }
1211 else
1212 {
1213 // slower path to support subnormals
1214 // if __hi is subnormal, avoid scaling by inf & final mul by 0
1215 // (which yields NaN) by using min()
1216 _V __scale = _V(1 / __norm_min_v<_Tp>);
1217 // invert exponent w/o error and w/o using the slow divider
1218 // unit: xor inverts the exponent but off by 1. Multiplication
1219 // with .5 adjusts for the discrepancy.
1220 where(__hi >= __norm_min_v<_Tp>, __scale)
1221 = ((__hi & __inf) ^ __inf) * _Tp(.5);
1222 // adjust final exponent for subnormal inputs
1223 _V __hi_exp = __norm_min_v<_Tp>;
1224 where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1225 = __hi & __inf; // no error
1226 _V __h1 = __hi * __scale; // no error
1227 __l0 *= __scale; // no error
1228 __l1 *= __scale; // no error
1229 _V __lo = __l0 * __l0
1230 + __l1 * __l1; // add the two smaller values first
1231 _V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
1232#ifdef __STDC_IEC_559__
1233 // fixup for Annex F requirements
1234 _V __fixup = __hi; // __lo == 0
1235 // where(__lo == 0, __fixup) = __hi;
1236 where(isunordered(__x, __y + __z), __fixup)
1237 = __quiet_NaN_v<_Tp>;
1238 where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
1239 = __inf;
1240 // Instead of __lo == 0, the following could depend on __h1² ==
1241 // __h1² + __lo (i.e. __hi is so much larger than the other two
1242 // inputs that the result is exactly __hi). While this may
1243 // improve precision, it is likely to reduce efficiency if the
1244 // ISA has FMAs (because __h1² + __lo is an FMA, but the
1245 // intermediate
1246 // __h1² must be kept)
1247 where(!(__lo == 0 || isunordered(__x, __y + __z)
1248 || isinf(__absx) || isinf(__absy) || isinf(__absz)),
1249 __fixup)
1250 = __r;
1251 __r = __fixup;
1252#endif
1253 return __r;
1254 }
1255 }
1256 }
1257 }
1258
1259 template <typename _Tp, typename _Abi>
1260 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1261 hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
1262 const simd<_Tp, _Abi>& __z)
1263 {
1264 return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1265 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1266 __y,
1267 __z);
1268 }
1269
1270_GLIBCXX_SIMD_CVTING3(hypot)
1271
1272_GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
1273
1274_GLIBCXX_SIMD_MATH_CALL_(sqrt)
1275_GLIBCXX_SIMD_MATH_CALL_(erf)
1276_GLIBCXX_SIMD_MATH_CALL_(erfc)
1277_GLIBCXX_SIMD_MATH_CALL_(lgamma)
1278_GLIBCXX_SIMD_MATH_CALL_(tgamma)
1279_GLIBCXX_SIMD_MATH_CALL_(ceil)
1280_GLIBCXX_SIMD_MATH_CALL_(floor)
1281_GLIBCXX_SIMD_MATH_CALL_(nearbyint)
1282_GLIBCXX_SIMD_MATH_CALL_(rint)
1283_GLIBCXX_SIMD_MATH_CALL_(lrint)
1284_GLIBCXX_SIMD_MATH_CALL_(llrint)
1285
1286_GLIBCXX_SIMD_MATH_CALL_(round)
1287_GLIBCXX_SIMD_MATH_CALL_(lround)
1288_GLIBCXX_SIMD_MATH_CALL_(llround)
1289
1290_GLIBCXX_SIMD_MATH_CALL_(trunc)
1291
1292_GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
1293_GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
1294_GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
1295
1296template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1297 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1298 copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1299 {
1300 if constexpr (simd_size_v<_Tp, _Abi> == 1)
1301 return std::copysign(__x[0], __y[0]);
1302 else if constexpr (__is_fixed_size_abi_v<_Abi>)
1303 return {__private_init, _Abi::_SimdImpl::_S_copysign(__data(__x), __data(__y))};
1304 else
1305 {
1306 using _V = simd<_Tp, _Abi>;
1307 using namespace std::experimental::__float_bitwise_operators;
1308 _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
1309 return (__x & ~__signmask) | (__y & __signmask);
1310 }
1311 }
1312
1313_GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
1314// not covered in [parallel.simd.math]:
1315// _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
1316_GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
1317_GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
1318_GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
1319
1320_GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
1321_GLIBCXX_SIMD_MATH_CALL_(fpclassify)
1322_GLIBCXX_SIMD_MATH_CALL_(isfinite)
1323
1324// isnan and isinf require special treatment because old glibc may declare
1325// `int isinf(double)`.
1326template <typename _Tp, typename _Abi, typename...,
1327 typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1328 _GLIBCXX_SIMD_ALWAYS_INLINE
1329 enable_if_t<is_floating_point_v<_Tp>, _R>
1330 isinf(simd<_Tp, _Abi> __x)
1331 { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
1332
1333template <typename _Tp, typename _Abi, typename...,
1334 typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1335 _GLIBCXX_SIMD_ALWAYS_INLINE
1336 enable_if_t<is_floating_point_v<_Tp>, _R>
1337 isnan(simd<_Tp, _Abi> __x)
1338 { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
1339
1340_GLIBCXX_SIMD_MATH_CALL_(isnormal)
1341
1342template <typename..., typename _Tp, typename _Abi>
1343 _GLIBCXX_SIMD_ALWAYS_INLINE
1344 simd_mask<_Tp, _Abi>
1345 signbit(simd<_Tp, _Abi> __x)
1346 {
1347 if constexpr (is_integral_v<_Tp>)
1348 {
1349 if constexpr (is_unsigned_v<_Tp>)
1350 return simd_mask<_Tp, _Abi>{}; // false
1351 else
1352 return __x < 0;
1353 }
1354 else
1355 return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
1356 }
1357
1358_GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
1359_GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
1360_GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
1361_GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
1362_GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
1363_GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
1364
1365/* not covered in [parallel.simd.math]
1366template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
1367template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
1368template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
1369
1370template <typename _V> struct simd_div_t {
1371 _V quot, rem;
1372};
1373
1374template <typename _Abi>
1375simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
1376 _SCharv<_Abi> denom);
1377template <typename _Abi>
1378simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
1379 __shortv<_Abi> denom);
1380template <typename _Abi>
1381simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
1382template <typename _Abi>
1383simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
1384 __longv<_Abi> denom);
1385template <typename _Abi>
1386simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
1387 __llongv<_Abi> denom);
1388*/
1389
1390// special math {{{
1391template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1392 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1393 assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1394 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1395 const simd<_Tp, _Abi>& __x)
1396 {
1397 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1398 return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
1399 });
1400 }
1401
1402template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1403 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1404 assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1405 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1406 const simd<_Tp, _Abi>& __x)
1407 {
1408 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1409 return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
1410 });
1411 }
1412
1413_GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
1414_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
1415_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
1416_GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
1417_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
1418_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
1419_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
1420_GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
1421_GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
1422_GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
1423_GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
1424_GLIBCXX_SIMD_MATH_CALL_(expint)
1425
1426template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1427 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1428 hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1429 const simd<_Tp, _Abi>& __x)
1430 {
1431 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1432 return std::hermite(__n[__i], __x[__i]);
1433 });
1434 }
1435
1436template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1437 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1438 laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1439 const simd<_Tp, _Abi>& __x)
1440 {
1441 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1442 return std::laguerre(__n[__i], __x[__i]);
1443 });
1444 }
1445
1446template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1447 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1448 legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1449 const simd<_Tp, _Abi>& __x)
1450 {
1451 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1452 return std::legendre(__n[__i], __x[__i]);
1453 });
1454 }
1455
1456_GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
1457
1458template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1459 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1460 sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1461 const simd<_Tp, _Abi>& __x)
1462 {
1463 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1464 return std::sph_bessel(__n[__i], __x[__i]);
1465 });
1466 }
1467
1468template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1469 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1470 sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
1471 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1472 const simd<_Tp, _Abi>& theta)
1473 {
1474 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1475 return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
1476 });
1477 }
1478
1479template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1480 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1481 sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1482 const simd<_Tp, _Abi>& __x)
1483 {
1484 return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1485 return std::sph_neumann(__n[__i], __x[__i]);
1486 });
1487 }
1488// }}}
1489
1490#undef _GLIBCXX_SIMD_CVTING2
1491#undef _GLIBCXX_SIMD_CVTING3
1492#undef _GLIBCXX_SIMD_MATH_CALL_
1493#undef _GLIBCXX_SIMD_MATH_CALL2_
1494#undef _GLIBCXX_SIMD_MATH_CALL3_
1495
1496_GLIBCXX_SIMD_END_NAMESPACE
1497
1498#endif // __cplusplus >= 201703L
1499#endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
1500
1501// vim: foldmethod=marker sw=2 ts=8 noet sts=2
complex< _Tp > sin(const complex< _Tp > &)
Return complex sine of z.
Definition complex:1125
_Tp abs(const complex< _Tp > &)
Return magnitude of z.
Definition complex:896
complex< _Tp > cos(const complex< _Tp > &)
Return complex cosine of z.
Definition complex:1007
complex< _Tp > sqrt(const complex< _Tp > &)
Return complex square root of z.
Definition complex:1199
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition type_traits:2785
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition type_traits:2555
__gnu_cxx::__promote< _Tp >::__type sph_bessel(unsigned int __n, _Tp __x)
Definition specfun.h:1100
__gnu_cxx::__promote< _Tp >::__type legendre(unsigned int __l, _Tp __x)
Definition specfun.h:1005
__gnu_cxx::__promote< _Tp >::__type sph_neumann(unsigned int __n, _Tp __x)
Definition specfun.h:1191
__gnu_cxx::__promote< _Tp >::__type assoc_laguerre(unsigned int __n, unsigned int __m, _Tp __x)
Definition specfun.h:250
__gnu_cxx::__promote< _Tp >::__type sph_legendre(unsigned int __l, unsigned int __m, _Tp __theta)
Definition specfun.h:1147
__gnu_cxx::__promote< _Tp >::__type hermite(unsigned int __n, _Tp __x)
Definition specfun.h:916
__gnu_cxx::__promote< _Tp >::__type laguerre(unsigned int __n, _Tp __x)
Definition specfun.h:960
__gnu_cxx::__promote< _Tp >::__type assoc_legendre(unsigned int __l, unsigned int __m, _Tp __x)
Definition specfun.h:296