]>
Commit | Line | Data |
---|---|---|
5fee5ec3 IB |
1 | // Written in the D programming language. |
2 | ||
3 | /** | |
4 | This is a submodule of $(MREF std, math). | |
5 | ||
6 | It contains hardware support for floating point numbers. | |
7 | ||
8 | Copyright: Copyright The D Language Foundation 2000 - 2011. | |
9 | License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). | |
10 | Authors: $(HTTP digitalmars.com, Walter Bright), Don Clugston, | |
11 | Conversion of CEPHES math library to D by Iain Buclaw and David Nadlinger | |
12 | Source: $(PHOBOSSRC std/math/hardware.d) | |
13 | */ | |
14 | ||
15 | /* NOTE: This file has been patched from the original DMD distribution to | |
16 | * work with the GDC compiler. | |
17 | */ | |
18 | module std.math.hardware; | |
19 | ||
20 | static import core.stdc.fenv; | |
21 | ||
22 | version (X86) version = X86_Any; | |
23 | version (X86_64) version = X86_Any; | |
24 | version (PPC) version = PPC_Any; | |
25 | version (PPC64) version = PPC_Any; | |
26 | version (MIPS32) version = MIPS_Any; | |
27 | version (MIPS64) version = MIPS_Any; | |
28 | version (AArch64) version = ARM_Any; | |
29 | version (ARM) version = ARM_Any; | |
30 | version (S390) version = IBMZ_Any; | |
31 | version (SPARC) version = SPARC_Any; | |
32 | version (SPARC64) version = SPARC_Any; | |
33 | version (SystemZ) version = IBMZ_Any; | |
34 | version (RISCV32) version = RISCV_Any; | |
35 | version (RISCV64) version = RISCV_Any; | |
36 | ||
37 | version (D_InlineAsm_X86) version = InlineAsm_X86_Any; | |
38 | version (D_InlineAsm_X86_64) version = InlineAsm_X86_Any; | |
39 | ||
40 | version (InlineAsm_X86_Any) version = InlineAsm_X87; | |
41 | version (InlineAsm_X87) | |
42 | { | |
43 | static assert(real.mant_dig == 64); | |
44 | version (CRuntime_Microsoft) version = InlineAsm_X87_MSVC; | |
45 | } | |
46 | ||
47 | version (X86_64) version = StaticallyHaveSSE; | |
48 | version (X86) version (OSX) version = StaticallyHaveSSE; | |
49 | ||
50 | version (StaticallyHaveSSE) | |
51 | { | |
52 | private enum bool haveSSE = true; | |
53 | } | |
54 | else version (X86) | |
55 | { | |
56 | static import core.cpuid; | |
57 | private alias haveSSE = core.cpuid.sse; | |
58 | } | |
59 | ||
60 | version (D_SoftFloat) | |
61 | { | |
62 | // Some soft float implementations may support IEEE floating flags. | |
63 | // The implementation here supports hardware flags only and is so currently | |
64 | // only available for supported targets. | |
65 | } | |
66 | else version (X86_Any) version = IeeeFlagsSupport; | |
67 | else version (PPC_Any) version = IeeeFlagsSupport; | |
68 | else version (RISCV_Any) version = IeeeFlagsSupport; | |
69 | else version (MIPS_Any) version = IeeeFlagsSupport; | |
70 | else version (ARM_Any) version = IeeeFlagsSupport; | |
71 | ||
72 | // Struct FloatingPointControl is only available if hardware FP units are available. | |
73 | version (D_HardFloat) | |
74 | { | |
75 | // FloatingPointControl.clearExceptions() depends on version IeeeFlagsSupport | |
76 | version (IeeeFlagsSupport) version = FloatingPointControlSupport; | |
77 | } | |
78 | ||
79 | version (GNU) | |
80 | { | |
81 | // The compiler can unexpectedly rearrange floating point operations and | |
82 | // access to the floating point status flags when optimizing. This means | |
83 | // ieeeFlags tests cannot be reliably checked in optimized code. | |
84 | // See https://github.com/ldc-developers/ldc/issues/888 | |
85 | } | |
86 | else | |
87 | { | |
88 | version = IeeeFlagsUnittest; | |
89 | version = FloatingPointControlUnittest; | |
90 | } | |
91 | ||
92 | version (IeeeFlagsSupport) | |
93 | { | |
94 | ||
95 | /** IEEE exception status flags ('sticky bits') | |
96 | ||
97 | These flags indicate that an exceptional floating-point condition has occurred. | |
98 | They indicate that a NaN or an infinity has been generated, that a result | |
99 | is inexact, or that a signalling NaN has been encountered. If floating-point | |
100 | exceptions are enabled (unmasked), a hardware exception will be generated | |
101 | instead of setting these flags. | |
102 | */ | |
103 | struct IeeeFlags | |
104 | { | |
105 | nothrow @nogc: | |
106 | ||
107 | private: | |
108 | // The x87 FPU status register is 16 bits. | |
109 | // The Pentium SSE2 status register is 32 bits. | |
110 | // The ARM and PowerPC FPSCR is a 32-bit register. | |
111 | // The SPARC FSR is a 32bit register (64 bits for SPARC 7 & 8, but high bits are uninteresting). | |
112 | // The RISC-V (32 & 64 bit) fcsr is 32-bit register. | |
113 | uint flags; | |
114 | ||
115 | version (CRuntime_Microsoft) | |
116 | { | |
117 | // Microsoft uses hardware-incompatible custom constants in fenv.h (core.stdc.fenv). | |
118 | // Applies to both x87 status word (16 bits) and SSE2 status word(32 bits). | |
119 | enum : int | |
120 | { | |
121 | INEXACT_MASK = 0x20, | |
122 | UNDERFLOW_MASK = 0x10, | |
123 | OVERFLOW_MASK = 0x08, | |
124 | DIVBYZERO_MASK = 0x04, | |
125 | INVALID_MASK = 0x01, | |
126 | ||
127 | EXCEPTIONS_MASK = 0b11_1111 | |
128 | } | |
129 | // Don't bother about subnormals, they are not supported on most CPUs. | |
130 | // SUBNORMAL_MASK = 0x02; | |
131 | } | |
132 | else | |
133 | { | |
134 | enum : int | |
135 | { | |
136 | INEXACT_MASK = core.stdc.fenv.FE_INEXACT, | |
137 | UNDERFLOW_MASK = core.stdc.fenv.FE_UNDERFLOW, | |
138 | OVERFLOW_MASK = core.stdc.fenv.FE_OVERFLOW, | |
139 | DIVBYZERO_MASK = core.stdc.fenv.FE_DIVBYZERO, | |
140 | INVALID_MASK = core.stdc.fenv.FE_INVALID, | |
141 | EXCEPTIONS_MASK = core.stdc.fenv.FE_ALL_EXCEPT, | |
142 | } | |
143 | } | |
144 | ||
145 | static uint getIeeeFlags() @trusted pure | |
146 | { | |
147 | version (GNU) | |
148 | { | |
149 | version (X86_Any) | |
150 | { | |
151 | ushort sw; | |
152 | asm pure nothrow @nogc | |
153 | { | |
154 | "fstsw %0" : "=a" (sw); | |
155 | } | |
156 | // OR the result with the SSE2 status register (MXCSR). | |
157 | if (haveSSE) | |
158 | { | |
159 | uint mxcsr; | |
160 | asm pure nothrow @nogc | |
161 | { | |
162 | "stmxcsr %0" : "=m" (mxcsr); | |
163 | } | |
164 | return (sw | mxcsr) & EXCEPTIONS_MASK; | |
165 | } | |
166 | else | |
167 | return sw & EXCEPTIONS_MASK; | |
168 | } | |
169 | else version (ARM) | |
170 | { | |
171 | version (ARM_SoftFloat) | |
172 | return 0; | |
173 | else | |
174 | { | |
175 | uint result = void; | |
176 | asm pure nothrow @nogc | |
177 | { | |
178 | "vmrs %0, FPSCR; and %0, %0, #0x1F;" : "=r" (result); | |
179 | } | |
180 | return result; | |
181 | } | |
182 | } | |
183 | else version (RISCV_Any) | |
184 | { | |
185 | version (D_SoftFloat) | |
186 | return 0; | |
187 | else | |
188 | { | |
189 | uint result = void; | |
190 | asm pure nothrow @nogc | |
191 | { | |
192 | "frflags %0" : "=r" (result); | |
193 | } | |
194 | return result; | |
195 | } | |
196 | } | |
197 | else | |
198 | assert(0, "Not yet supported"); | |
199 | } | |
200 | else | |
201 | version (InlineAsm_X86_Any) | |
202 | { | |
203 | ushort sw; | |
204 | asm pure nothrow @nogc { fstsw sw; } | |
205 | ||
206 | // OR the result with the SSE2 status register (MXCSR). | |
207 | if (haveSSE) | |
208 | { | |
209 | uint mxcsr; | |
210 | asm pure nothrow @nogc { stmxcsr mxcsr; } | |
211 | return (sw | mxcsr) & EXCEPTIONS_MASK; | |
212 | } | |
213 | else return sw & EXCEPTIONS_MASK; | |
214 | } | |
215 | else version (SPARC) | |
216 | { | |
5eb9927a | 217 | /* |
5fee5ec3 IB |
218 | int retval; |
219 | asm pure nothrow @nogc { st %fsr, retval; } | |
220 | return retval; | |
221 | */ | |
5eb9927a | 222 | assert(0, "Not yet supported"); |
5fee5ec3 IB |
223 | } |
224 | else version (ARM) | |
225 | { | |
226 | assert(false, "Not yet supported."); | |
227 | } | |
228 | else version (RISCV_Any) | |
229 | { | |
230 | mixin(` | |
231 | uint result = void; | |
232 | asm pure nothrow @nogc | |
233 | { | |
234 | "frflags %0" : "=r" (result); | |
235 | } | |
236 | return result; | |
237 | `); | |
238 | } | |
239 | else | |
240 | assert(0, "Not yet supported"); | |
241 | } | |
242 | ||
243 | static void resetIeeeFlags() @trusted | |
244 | { | |
245 | version (GNU) | |
246 | { | |
247 | version (X86_Any) | |
248 | { | |
249 | asm nothrow @nogc | |
250 | { | |
251 | "fnclex"; | |
252 | } | |
253 | ||
254 | // Also clear exception flags in MXCSR, SSE's control register. | |
255 | if (haveSSE) | |
256 | { | |
257 | uint mxcsr; | |
258 | asm nothrow @nogc | |
259 | { | |
260 | "stmxcsr %0" : "=m" (mxcsr); | |
261 | } | |
262 | mxcsr &= ~EXCEPTIONS_MASK; | |
263 | asm nothrow @nogc | |
264 | { | |
265 | "ldmxcsr %0" : : "m" (mxcsr); | |
266 | } | |
267 | } | |
268 | } | |
269 | else version (ARM) | |
270 | { | |
271 | version (ARM_SoftFloat) | |
272 | return; | |
273 | else | |
274 | { | |
275 | uint old = FloatingPointControl.getControlState(); | |
276 | old &= ~0b11111; // http://infocenter.arm.com/help/topic/com.arm.doc.ddi0408i/Chdfifdc.html | |
277 | asm nothrow @nogc | |
278 | { | |
279 | "vmsr FPSCR, %0" : : "r" (old); | |
280 | } | |
281 | } | |
282 | } | |
283 | else version (RISCV_Any) | |
284 | { | |
285 | version (D_SoftFloat) | |
286 | return; | |
287 | else | |
288 | { | |
289 | uint newValues = 0x0; | |
290 | asm nothrow @nogc | |
291 | { | |
292 | "fsflags %0" : : "r" (newValues); | |
293 | } | |
294 | } | |
295 | } | |
296 | else | |
297 | assert(0, "Not yet supported"); | |
298 | } | |
299 | else | |
300 | version (InlineAsm_X86_Any) | |
301 | { | |
302 | asm nothrow @nogc | |
303 | { | |
304 | fnclex; | |
305 | } | |
306 | ||
307 | // Also clear exception flags in MXCSR, SSE's control register. | |
308 | if (haveSSE) | |
309 | { | |
310 | uint mxcsr; | |
311 | asm nothrow @nogc { stmxcsr mxcsr; } | |
312 | mxcsr &= ~EXCEPTIONS_MASK; | |
313 | asm nothrow @nogc { ldmxcsr mxcsr; } | |
314 | } | |
315 | } | |
316 | else version (RISCV_Any) | |
317 | { | |
318 | mixin(` | |
319 | uint newValues = 0x0; | |
320 | asm pure nothrow @nogc | |
321 | { | |
322 | "fsflags %0" : : "r" (newValues); | |
323 | } | |
324 | `); | |
325 | } | |
326 | else | |
327 | { | |
328 | /* SPARC: | |
329 | int tmpval; | |
330 | asm pure nothrow @nogc { st %fsr, tmpval; } | |
331 | tmpval &=0xFFFF_FC00; | |
332 | asm pure nothrow @nogc { ld tmpval, %fsr; } | |
333 | */ | |
334 | assert(0, "Not yet supported"); | |
335 | } | |
336 | } | |
337 | ||
338 | public: | |
339 | /** | |
340 | * The result cannot be represented exactly, so rounding occurred. | |
341 | * Example: `x = sin(0.1);` | |
342 | */ | |
343 | @property bool inexact() @safe const { return (flags & INEXACT_MASK) != 0; } | |
344 | ||
345 | /** | |
346 | * A zero was generated by underflow | |
347 | * Example: `x = real.min*real.epsilon/2;` | |
348 | */ | |
349 | @property bool underflow() @safe const { return (flags & UNDERFLOW_MASK) != 0; } | |
350 | ||
351 | /** | |
352 | * An infinity was generated by overflow | |
353 | * Example: `x = real.max*2;` | |
354 | */ | |
355 | @property bool overflow() @safe const { return (flags & OVERFLOW_MASK) != 0; } | |
356 | ||
357 | /** | |
358 | * An infinity was generated by division by zero | |
359 | * Example: `x = 3/0.0;` | |
360 | */ | |
361 | @property bool divByZero() @safe const { return (flags & DIVBYZERO_MASK) != 0; } | |
362 | ||
363 | /** | |
364 | * A machine NaN was generated. | |
365 | * Example: `x = real.infinity * 0.0;` | |
366 | */ | |
367 | @property bool invalid() @safe const { return (flags & INVALID_MASK) != 0; } | |
368 | } | |
369 | ||
370 | /// | |
371 | version (IeeeFlagsUnittest) | |
372 | @safe unittest | |
373 | { | |
374 | import std.math.traits : isNaN; | |
375 | ||
376 | static void func() { | |
377 | int a = 10 * 10; | |
378 | } | |
379 | pragma(inline, false) static void blockopt(ref real x) {} | |
380 | real a = 3.5; | |
381 | // Set all the flags to zero | |
382 | resetIeeeFlags(); | |
383 | assert(!ieeeFlags.divByZero); | |
384 | blockopt(a); // avoid constant propagation by the optimizer | |
385 | // Perform a division by zero. | |
386 | a /= 0.0L; | |
387 | assert(a == real.infinity); | |
388 | assert(ieeeFlags.divByZero); | |
389 | blockopt(a); // avoid constant propagation by the optimizer | |
390 | // Create a NaN | |
391 | a *= 0.0L; | |
392 | assert(ieeeFlags.invalid); | |
393 | assert(isNaN(a)); | |
394 | ||
395 | // Check that calling func() has no effect on the | |
396 | // status flags. | |
397 | IeeeFlags f = ieeeFlags; | |
398 | func(); | |
399 | assert(ieeeFlags == f); | |
400 | } | |
401 | ||
402 | version (IeeeFlagsUnittest) | |
403 | @safe unittest | |
404 | { | |
405 | import std.meta : AliasSeq; | |
406 | ||
407 | static struct Test | |
408 | { | |
409 | void delegate() @trusted action; | |
410 | bool function() @trusted ieeeCheck; | |
411 | } | |
412 | ||
413 | static foreach (T; AliasSeq!(float, double, real)) | |
414 | {{ | |
415 | T x; /* Needs to be here to trick -O. It would optimize away the | |
416 | calculations if x were local to the function literals. */ | |
417 | auto tests = [ | |
418 | Test( | |
419 | () { x = 1; x += 0.1L; }, | |
420 | () => ieeeFlags.inexact | |
421 | ), | |
422 | Test( | |
423 | () { x = T.min_normal; x /= T.max; }, | |
424 | () => ieeeFlags.underflow | |
425 | ), | |
426 | Test( | |
427 | () { x = T.max; x += T.max; }, | |
428 | () => ieeeFlags.overflow | |
429 | ), | |
430 | Test( | |
431 | () { x = 1; x /= 0; }, | |
432 | () => ieeeFlags.divByZero | |
433 | ), | |
434 | Test( | |
435 | () { x = 0; x /= 0; }, | |
436 | () => ieeeFlags.invalid | |
437 | ) | |
438 | ]; | |
439 | foreach (test; tests) | |
440 | { | |
441 | resetIeeeFlags(); | |
442 | assert(!test.ieeeCheck()); | |
443 | test.action(); | |
444 | assert(test.ieeeCheck()); | |
445 | } | |
446 | }} | |
447 | } | |
448 | ||
449 | /// Set all of the floating-point status flags to false. | |
450 | void resetIeeeFlags() @trusted nothrow @nogc | |
451 | { | |
452 | IeeeFlags.resetIeeeFlags(); | |
453 | } | |
454 | ||
455 | /// | |
456 | @safe unittest | |
457 | { | |
458 | pragma(inline, false) static void blockopt(ref real x) {} | |
459 | resetIeeeFlags(); | |
460 | real a = 3.5; | |
461 | blockopt(a); // avoid constant propagation by the optimizer | |
462 | a /= 0.0L; | |
463 | blockopt(a); // avoid constant propagation by the optimizer | |
464 | assert(a == real.infinity); | |
465 | assert(ieeeFlags.divByZero); | |
466 | ||
467 | resetIeeeFlags(); | |
468 | assert(!ieeeFlags.divByZero); | |
469 | } | |
470 | ||
471 | /// Returns: snapshot of the current state of the floating-point status flags | |
472 | @property IeeeFlags ieeeFlags() @trusted pure nothrow @nogc | |
473 | { | |
474 | return IeeeFlags(IeeeFlags.getIeeeFlags()); | |
475 | } | |
476 | ||
477 | /// | |
478 | @safe nothrow unittest | |
479 | { | |
480 | import std.math.traits : isNaN; | |
481 | ||
482 | pragma(inline, false) static void blockopt(ref real x) {} | |
483 | resetIeeeFlags(); | |
484 | real a = 3.5; | |
485 | blockopt(a); // avoid constant propagation by the optimizer | |
486 | ||
487 | a /= 0.0L; | |
488 | assert(a == real.infinity); | |
489 | assert(ieeeFlags.divByZero); | |
490 | blockopt(a); // avoid constant propagation by the optimizer | |
491 | ||
492 | a *= 0.0L; | |
493 | assert(isNaN(a)); | |
494 | assert(ieeeFlags.invalid); | |
495 | } | |
496 | ||
497 | } // IeeeFlagsSupport | |
498 | ||
499 | ||
500 | version (FloatingPointControlSupport) | |
501 | { | |
502 | ||
503 | /** Control the Floating point hardware | |
504 | ||
505 | Change the IEEE754 floating-point rounding mode and the floating-point | |
506 | hardware exceptions. | |
507 | ||
508 | By default, the rounding mode is roundToNearest and all hardware exceptions | |
509 | are disabled. For most applications, debugging is easier if the $(I division | |
510 | by zero), $(I overflow), and $(I invalid operation) exceptions are enabled. | |
511 | These three are combined into a $(I severeExceptions) value for convenience. | |
512 | Note in particular that if $(I invalidException) is enabled, a hardware trap | |
513 | will be generated whenever an uninitialized floating-point variable is used. | |
514 | ||
515 | All changes are temporary. The previous state is restored at the | |
516 | end of the scope. | |
517 | ||
518 | ||
519 | Example: | |
520 | ---- | |
521 | { | |
522 | FloatingPointControl fpctrl; | |
523 | ||
524 | // Enable hardware exceptions for division by zero, overflow to infinity, | |
525 | // invalid operations, and uninitialized floating-point variables. | |
526 | fpctrl.enableExceptions(FloatingPointControl.severeExceptions); | |
527 | ||
528 | // This will generate a hardware exception, if x is a | |
529 | // default-initialized floating point variable: | |
530 | real x; // Add `= 0` or even `= real.nan` to not throw the exception. | |
531 | real y = x * 3.0; | |
532 | ||
533 | // The exception is only thrown for default-uninitialized NaN-s. | |
534 | // NaN-s with other payload are valid: | |
535 | real z = y * real.nan; // ok | |
536 | ||
537 | // The set hardware exceptions and rounding modes will be disabled when | |
538 | // leaving this scope. | |
539 | } | |
540 | ---- | |
541 | ||
542 | */ | |
543 | struct FloatingPointControl | |
544 | { | |
545 | nothrow @nogc: | |
546 | ||
547 | alias RoundingMode = uint; /// | |
548 | ||
549 | version (StdDdoc) | |
550 | { | |
551 | enum : RoundingMode | |
552 | { | |
553 | /** IEEE rounding modes. | |
554 | * The default mode is roundToNearest. | |
555 | * | |
556 | * roundingMask = A mask of all rounding modes. | |
557 | */ | |
558 | roundToNearest, | |
559 | roundDown, /// ditto | |
560 | roundUp, /// ditto | |
561 | roundToZero, /// ditto | |
562 | roundingMask, /// ditto | |
563 | } | |
564 | } | |
565 | else version (CRuntime_Microsoft) | |
566 | { | |
567 | // Microsoft uses hardware-incompatible custom constants in fenv.h (core.stdc.fenv). | |
568 | enum : RoundingMode | |
569 | { | |
570 | roundToNearest = 0x0000, | |
571 | roundDown = 0x0400, | |
572 | roundUp = 0x0800, | |
573 | roundToZero = 0x0C00, | |
574 | roundingMask = roundToNearest | roundDown | |
575 | | roundUp | roundToZero, | |
576 | } | |
577 | } | |
578 | else | |
579 | { | |
580 | enum : RoundingMode | |
581 | { | |
582 | roundToNearest = core.stdc.fenv.FE_TONEAREST, | |
583 | roundDown = core.stdc.fenv.FE_DOWNWARD, | |
584 | roundUp = core.stdc.fenv.FE_UPWARD, | |
585 | roundToZero = core.stdc.fenv.FE_TOWARDZERO, | |
586 | roundingMask = roundToNearest | roundDown | |
587 | | roundUp | roundToZero, | |
588 | } | |
589 | } | |
590 | ||
591 | /*** | |
592 | * Change the floating-point hardware rounding mode | |
593 | * | |
594 | * Changing the rounding mode in the middle of a function can interfere | |
595 | * with optimizations of floating point expressions, as the optimizer assumes | |
596 | * that the rounding mode does not change. | |
597 | * It is best to change the rounding mode only at the | |
598 | * beginning of the function, and keep it until the function returns. | |
599 | * It is also best to add the line: | |
600 | * --- | |
601 | * pragma(inline, false); | |
602 | * --- | |
603 | * as the first line of the function so it will not get inlined. | |
604 | * Params: | |
605 | * newMode = the new rounding mode | |
606 | */ | |
607 | @property void rounding(RoundingMode newMode) @trusted | |
608 | { | |
609 | initialize(); | |
610 | setControlState((getControlState() & (-1 - roundingMask)) | (newMode & roundingMask)); | |
611 | } | |
612 | ||
613 | /// Returns: the currently active rounding mode | |
614 | @property static RoundingMode rounding() @trusted pure | |
615 | { | |
616 | return cast(RoundingMode)(getControlState() & roundingMask); | |
617 | } | |
618 | ||
619 | alias ExceptionMask = uint; /// | |
620 | ||
621 | version (StdDdoc) | |
622 | { | |
623 | enum : ExceptionMask | |
624 | { | |
625 | /** IEEE hardware exceptions. | |
626 | * By default, all exceptions are masked (disabled). | |
627 | * | |
628 | * severeExceptions = The overflow, division by zero, and invalid | |
629 | * exceptions. | |
630 | */ | |
631 | subnormalException, | |
632 | inexactException, /// ditto | |
633 | underflowException, /// ditto | |
634 | overflowException, /// ditto | |
635 | divByZeroException, /// ditto | |
636 | invalidException, /// ditto | |
637 | severeExceptions, /// ditto | |
638 | allExceptions, /// ditto | |
639 | } | |
640 | } | |
641 | else version (ARM_Any) | |
642 | { | |
643 | enum : ExceptionMask | |
644 | { | |
645 | subnormalException = 0x8000, | |
646 | inexactException = 0x1000, | |
647 | underflowException = 0x0800, | |
648 | overflowException = 0x0400, | |
649 | divByZeroException = 0x0200, | |
650 | invalidException = 0x0100, | |
651 | severeExceptions = overflowException | divByZeroException | |
652 | | invalidException, | |
653 | allExceptions = severeExceptions | underflowException | |
654 | | inexactException | subnormalException, | |
655 | } | |
656 | } | |
657 | else version (PPC_Any) | |
658 | { | |
659 | enum : ExceptionMask | |
660 | { | |
661 | inexactException = 0x0008, | |
662 | divByZeroException = 0x0010, | |
663 | underflowException = 0x0020, | |
664 | overflowException = 0x0040, | |
665 | invalidException = 0x0080, | |
666 | severeExceptions = overflowException | divByZeroException | |
667 | | invalidException, | |
668 | allExceptions = severeExceptions | underflowException | |
669 | | inexactException, | |
670 | } | |
671 | } | |
672 | else version (RISCV_Any) | |
673 | { | |
674 | enum : ExceptionMask | |
675 | { | |
676 | inexactException = 0x01, | |
d0e4bdcd GM |
677 | divByZeroException = 0x08, |
678 | underflowException = 0x02, | |
679 | overflowException = 0x04, | |
5fee5ec3 IB |
680 | invalidException = 0x10, |
681 | severeExceptions = overflowException | divByZeroException | |
682 | | invalidException, | |
683 | allExceptions = severeExceptions | underflowException | |
684 | | inexactException, | |
685 | } | |
686 | } | |
687 | else version (HPPA) | |
688 | { | |
689 | enum : ExceptionMask | |
690 | { | |
691 | inexactException = 0x01, | |
692 | underflowException = 0x02, | |
693 | overflowException = 0x04, | |
694 | divByZeroException = 0x08, | |
695 | invalidException = 0x10, | |
696 | severeExceptions = overflowException | divByZeroException | |
697 | | invalidException, | |
698 | allExceptions = severeExceptions | underflowException | |
699 | | inexactException, | |
700 | } | |
701 | } | |
702 | else version (MIPS_Any) | |
703 | { | |
704 | enum : ExceptionMask | |
705 | { | |
706 | inexactException = 0x0080, | |
707 | divByZeroException = 0x0400, | |
708 | overflowException = 0x0200, | |
709 | underflowException = 0x0100, | |
710 | invalidException = 0x0800, | |
711 | severeExceptions = overflowException | divByZeroException | |
712 | | invalidException, | |
713 | allExceptions = severeExceptions | underflowException | |
714 | | inexactException, | |
715 | } | |
716 | } | |
717 | else version (SPARC_Any) | |
718 | { | |
719 | enum : ExceptionMask | |
720 | { | |
721 | inexactException = 0x0800000, | |
722 | divByZeroException = 0x1000000, | |
723 | overflowException = 0x4000000, | |
724 | underflowException = 0x2000000, | |
725 | invalidException = 0x8000000, | |
726 | severeExceptions = overflowException | divByZeroException | |
727 | | invalidException, | |
728 | allExceptions = severeExceptions | underflowException | |
729 | | inexactException, | |
730 | } | |
731 | } | |
732 | else version (IBMZ_Any) | |
733 | { | |
734 | enum : ExceptionMask | |
735 | { | |
736 | inexactException = 0x08000000, | |
737 | divByZeroException = 0x40000000, | |
738 | overflowException = 0x20000000, | |
739 | underflowException = 0x10000000, | |
740 | invalidException = 0x80000000, | |
741 | severeExceptions = overflowException | divByZeroException | |
742 | | invalidException, | |
743 | allExceptions = severeExceptions | underflowException | |
744 | | inexactException, | |
745 | } | |
746 | } | |
747 | else version (X86_Any) | |
748 | { | |
749 | enum : ExceptionMask | |
750 | { | |
751 | inexactException = 0x20, | |
752 | underflowException = 0x10, | |
753 | overflowException = 0x08, | |
754 | divByZeroException = 0x04, | |
755 | subnormalException = 0x02, | |
756 | invalidException = 0x01, | |
757 | severeExceptions = overflowException | divByZeroException | |
758 | | invalidException, | |
759 | allExceptions = severeExceptions | underflowException | |
760 | | inexactException | subnormalException, | |
761 | } | |
762 | } | |
763 | else | |
764 | static assert(false, "Not implemented for this architecture"); | |
765 | ||
766 | version (ARM_Any) | |
767 | { | |
768 | static bool hasExceptionTraps_impl() @safe | |
769 | { | |
770 | auto oldState = getControlState(); | |
771 | // If exceptions are not supported, we set the bit but read it back as zero | |
772 | // https://sourceware.org/ml/libc-ports/2012-06/msg00091.html | |
773 | setControlState(oldState | divByZeroException); | |
774 | immutable result = (getControlState() & allExceptions) != 0; | |
775 | setControlState(oldState); | |
776 | return result; | |
777 | } | |
778 | } | |
779 | ||
780 | /// Returns: true if the current FPU supports exception trapping | |
781 | @property static bool hasExceptionTraps() @safe pure | |
782 | { | |
783 | version (X86_Any) | |
784 | return true; | |
785 | else version (PPC_Any) | |
786 | return true; | |
787 | else version (MIPS_Any) | |
788 | return true; | |
789 | else version (ARM_Any) | |
790 | { | |
791 | // The hasExceptionTraps_impl function is basically pure, | |
792 | // as it restores all global state | |
793 | auto fptr = ( () @trusted => cast(bool function() @safe | |
794 | pure nothrow @nogc)&hasExceptionTraps_impl)(); | |
795 | return fptr(); | |
796 | } | |
797 | else | |
798 | assert(0, "Not yet supported"); | |
799 | } | |
800 | ||
801 | /// Enable (unmask) specific hardware exceptions. Multiple exceptions may be ORed together. | |
802 | void enableExceptions(ExceptionMask exceptions) @trusted | |
803 | { | |
804 | assert(hasExceptionTraps); | |
805 | initialize(); | |
806 | version (X86_Any) | |
807 | setControlState(getControlState() & ~(exceptions & allExceptions)); | |
808 | else | |
809 | setControlState(getControlState() | (exceptions & allExceptions)); | |
810 | } | |
811 | ||
812 | /// Disable (mask) specific hardware exceptions. Multiple exceptions may be ORed together. | |
813 | void disableExceptions(ExceptionMask exceptions) @trusted | |
814 | { | |
815 | assert(hasExceptionTraps); | |
816 | initialize(); | |
817 | version (X86_Any) | |
818 | setControlState(getControlState() | (exceptions & allExceptions)); | |
819 | else | |
820 | setControlState(getControlState() & ~(exceptions & allExceptions)); | |
821 | } | |
822 | ||
823 | /// Returns: the exceptions which are currently enabled (unmasked) | |
824 | @property static ExceptionMask enabledExceptions() @trusted pure | |
825 | { | |
826 | assert(hasExceptionTraps); | |
827 | version (X86_Any) | |
828 | return (getControlState() & allExceptions) ^ allExceptions; | |
829 | else | |
830 | return (getControlState() & allExceptions); | |
831 | } | |
832 | ||
833 | /// Clear all pending exceptions, then restore the original exception state and rounding mode. | |
834 | ~this() @trusted | |
835 | { | |
836 | clearExceptions(); | |
837 | if (initialized) | |
838 | setControlState(savedState); | |
839 | } | |
840 | ||
841 | private: | |
842 | ControlState savedState; | |
843 | ||
844 | bool initialized = false; | |
845 | ||
846 | version (ARM_Any) | |
847 | { | |
848 | alias ControlState = uint; | |
849 | } | |
850 | else version (HPPA) | |
851 | { | |
852 | alias ControlState = uint; | |
853 | } | |
854 | else version (PPC_Any) | |
855 | { | |
856 | alias ControlState = uint; | |
857 | } | |
858 | else version (RISCV_Any) | |
859 | { | |
860 | alias ControlState = uint; | |
861 | } | |
862 | else version (MIPS_Any) | |
863 | { | |
864 | alias ControlState = uint; | |
865 | } | |
866 | else version (SPARC_Any) | |
867 | { | |
868 | alias ControlState = ulong; | |
869 | } | |
870 | else version (IBMZ_Any) | |
871 | { | |
872 | alias ControlState = uint; | |
873 | } | |
874 | else version (X86_Any) | |
875 | { | |
876 | alias ControlState = ushort; | |
877 | } | |
878 | else | |
879 | static assert(false, "Not implemented for this architecture"); | |
880 | ||
881 | void initialize() @safe | |
882 | { | |
883 | // BUG: This works around the absence of this() constructors. | |
884 | if (initialized) return; | |
885 | clearExceptions(); | |
886 | savedState = getControlState(); | |
887 | initialized = true; | |
888 | } | |
889 | ||
890 | // Clear all pending exceptions | |
891 | static void clearExceptions() @safe | |
892 | { | |
893 | version (IeeeFlagsSupport) | |
894 | resetIeeeFlags(); | |
895 | else | |
896 | static assert(false, "Not implemented for this architecture"); | |
897 | } | |
898 | ||
899 | // Read from the control register | |
900 | package(std.math) static ControlState getControlState() @trusted pure | |
901 | { | |
902 | version (GNU) | |
903 | { | |
904 | version (X86_Any) | |
905 | { | |
906 | ControlState cont; | |
907 | asm pure nothrow @nogc | |
908 | { | |
909 | "fstcw %0" : "=m" (cont); | |
910 | } | |
911 | return cont; | |
912 | } | |
913 | else version (AArch64) | |
914 | { | |
1c5317d6 | 915 | ControlState cont; |
5fee5ec3 IB |
916 | asm pure nothrow @nogc |
917 | { | |
918 | "mrs %0, FPCR;" : "=r" (cont); | |
919 | } | |
920 | return cont; | |
921 | } | |
922 | else version (ARM) | |
923 | { | |
924 | ControlState cont; | |
925 | version (ARM_SoftFloat) | |
926 | cont = 0; | |
927 | else | |
928 | { | |
929 | asm pure nothrow @nogc | |
930 | { | |
931 | "vmrs %0, FPSCR" : "=r" (cont); | |
932 | } | |
933 | } | |
934 | return cont; | |
935 | } | |
936 | else version (RISCV_Any) | |
937 | { | |
938 | version (D_SoftFloat) | |
939 | return 0; | |
940 | else | |
941 | { | |
942 | ControlState cont; | |
943 | asm pure nothrow @nogc | |
944 | { | |
945 | "frcsr %0" : "=r" (cont); | |
946 | } | |
947 | return cont; | |
948 | } | |
949 | } | |
950 | else | |
951 | assert(0, "Not yet supported"); | |
952 | } | |
953 | else | |
954 | version (D_InlineAsm_X86) | |
955 | { | |
956 | short cont; | |
957 | asm pure nothrow @nogc | |
958 | { | |
959 | xor EAX, EAX; | |
960 | fstcw cont; | |
961 | } | |
962 | return cont; | |
963 | } | |
964 | else version (D_InlineAsm_X86_64) | |
965 | { | |
966 | short cont; | |
967 | asm pure nothrow @nogc | |
968 | { | |
969 | xor RAX, RAX; | |
970 | fstcw cont; | |
971 | } | |
972 | return cont; | |
973 | } | |
974 | else version (RISCV_Any) | |
975 | { | |
976 | mixin(` | |
977 | ControlState cont; | |
978 | asm pure nothrow @nogc | |
979 | { | |
980 | "frcsr %0" : "=r" (cont); | |
981 | } | |
982 | return cont; | |
983 | `); | |
984 | } | |
985 | else | |
986 | assert(0, "Not yet supported"); | |
987 | } | |
988 | ||
989 | // Set the control register | |
990 | package(std.math) static void setControlState(ControlState newState) @trusted | |
991 | { | |
992 | version (GNU) | |
993 | { | |
994 | version (X86_Any) | |
995 | { | |
996 | asm nothrow @nogc | |
997 | { | |
998 | "fclex; fldcw %0" : : "m" (newState); | |
999 | } | |
1000 | ||
1001 | // Also update MXCSR, SSE's control register. | |
1002 | if (haveSSE) | |
1003 | { | |
1004 | uint mxcsr; | |
1005 | asm nothrow @nogc | |
1006 | { | |
1007 | "stmxcsr %0" : "=m" (mxcsr); | |
1008 | } | |
1009 | ||
1010 | /* In the FPU control register, rounding mode is in bits 10 and | |
1011 | 11. In MXCSR it's in bits 13 and 14. */ | |
1012 | mxcsr &= ~(roundingMask << 3); // delete old rounding mode | |
1013 | mxcsr |= (newState & roundingMask) << 3; // write new rounding mode | |
1014 | ||
1015 | /* In the FPU control register, masks are bits 0 through 5. | |
1016 | In MXCSR they're 7 through 12. */ | |
1017 | mxcsr &= ~(allExceptions << 7); // delete old masks | |
1018 | mxcsr |= (newState & allExceptions) << 7; // write new exception masks | |
1019 | ||
1020 | asm nothrow @nogc | |
1021 | { | |
1022 | "ldmxcsr %0" : : "m" (mxcsr); | |
1023 | } | |
1024 | } | |
1025 | } | |
1026 | else version (AArch64) | |
1027 | { | |
1028 | asm nothrow @nogc | |
1029 | { | |
1030 | "msr FPCR, %0;" : : "r" (newState); | |
1031 | } | |
1032 | } | |
1033 | else version (ARM) | |
1034 | { | |
1035 | version (ARM_SoftFloat) | |
1036 | return; | |
1037 | else | |
1038 | { | |
1039 | asm nothrow @nogc | |
1040 | { | |
1041 | "vmsr FPSCR, %0" : : "r" (newState); | |
1042 | } | |
1043 | } | |
1044 | } | |
1045 | else version (RISCV_Any) | |
1046 | { | |
1047 | version (D_SoftFloat) | |
1048 | return; | |
1049 | else | |
1050 | { | |
1051 | asm nothrow @nogc | |
1052 | { | |
1053 | "fscsr %0" : : "r" (newState); | |
1054 | } | |
1055 | } | |
1056 | } | |
1057 | else | |
1058 | assert(0, "Not yet supported"); | |
1059 | } | |
1060 | else | |
1061 | version (InlineAsm_X86_Any) | |
1062 | { | |
1063 | asm nothrow @nogc | |
1064 | { | |
1065 | fclex; | |
1066 | fldcw newState; | |
1067 | } | |
1068 | ||
1069 | // Also update MXCSR, SSE's control register. | |
1070 | if (haveSSE) | |
1071 | { | |
1072 | uint mxcsr; | |
1073 | asm nothrow @nogc { stmxcsr mxcsr; } | |
1074 | ||
1075 | /* In the FPU control register, rounding mode is in bits 10 and | |
1076 | 11. In MXCSR it's in bits 13 and 14. */ | |
1077 | mxcsr &= ~(roundingMask << 3); // delete old rounding mode | |
1078 | mxcsr |= (newState & roundingMask) << 3; // write new rounding mode | |
1079 | ||
1080 | /* In the FPU control register, masks are bits 0 through 5. | |
1081 | In MXCSR they're 7 through 12. */ | |
1082 | mxcsr &= ~(allExceptions << 7); // delete old masks | |
1083 | mxcsr |= (newState & allExceptions) << 7; // write new exception masks | |
1084 | ||
1085 | asm nothrow @nogc { ldmxcsr mxcsr; } | |
1086 | } | |
1087 | } | |
1088 | else version (RISCV_Any) | |
1089 | { | |
1090 | mixin(` | |
1091 | asm pure nothrow @nogc | |
1092 | { | |
1093 | "fscsr %0" : : "r" (newState); | |
1094 | } | |
1095 | `); | |
1096 | } | |
1097 | else | |
1098 | assert(0, "Not yet supported"); | |
1099 | } | |
1100 | } | |
1101 | ||
1102 | /// | |
1103 | version (FloatingPointControlUnittest) | |
1104 | @safe unittest | |
1105 | { | |
1106 | import std.math.rounding : lrint; | |
1107 | ||
1108 | FloatingPointControl fpctrl; | |
1109 | ||
1110 | fpctrl.rounding = FloatingPointControl.roundDown; | |
1111 | assert(lrint(1.5) == 1.0); | |
1112 | ||
1113 | fpctrl.rounding = FloatingPointControl.roundUp; | |
1114 | assert(lrint(1.4) == 2.0); | |
1115 | ||
1116 | fpctrl.rounding = FloatingPointControl.roundToNearest; | |
1117 | assert(lrint(1.5) == 2.0); | |
1118 | } | |
1119 | ||
1120 | @safe unittest | |
1121 | { | |
1122 | void ensureDefaults() | |
1123 | { | |
1124 | assert(FloatingPointControl.rounding | |
1125 | == FloatingPointControl.roundToNearest); | |
1126 | if (FloatingPointControl.hasExceptionTraps) | |
1127 | assert(FloatingPointControl.enabledExceptions == 0); | |
1128 | } | |
1129 | ||
1130 | { | |
1131 | FloatingPointControl ctrl; | |
1132 | } | |
1133 | ensureDefaults(); | |
1134 | ||
1135 | { | |
1136 | FloatingPointControl ctrl; | |
1137 | ctrl.rounding = FloatingPointControl.roundDown; | |
1138 | assert(FloatingPointControl.rounding == FloatingPointControl.roundDown); | |
1139 | } | |
1140 | ensureDefaults(); | |
1141 | ||
1142 | if (FloatingPointControl.hasExceptionTraps) | |
1143 | { | |
1144 | FloatingPointControl ctrl; | |
1145 | ctrl.enableExceptions(FloatingPointControl.divByZeroException | |
1146 | | FloatingPointControl.overflowException); | |
1147 | assert(ctrl.enabledExceptions == | |
1148 | (FloatingPointControl.divByZeroException | |
1149 | | FloatingPointControl.overflowException)); | |
1150 | ||
1151 | ctrl.rounding = FloatingPointControl.roundUp; | |
1152 | assert(FloatingPointControl.rounding == FloatingPointControl.roundUp); | |
1153 | } | |
1154 | ensureDefaults(); | |
1155 | } | |
1156 | ||
1157 | version (FloatingPointControlUnittest) | |
1158 | @safe unittest // rounding | |
1159 | { | |
1160 | import std.meta : AliasSeq; | |
1161 | ||
1162 | static T addRound(T)(uint rm) | |
1163 | { | |
1164 | pragma(inline, false) static void blockopt(ref T x) {} | |
1165 | pragma(inline, false); | |
1166 | FloatingPointControl fpctrl; | |
1167 | fpctrl.rounding = rm; | |
1168 | T x = 1; | |
1169 | blockopt(x); // avoid constant propagation by the optimizer | |
1170 | x += 0.1L; | |
1171 | return x; | |
1172 | } | |
1173 | ||
1174 | static T subRound(T)(uint rm) | |
1175 | { | |
1176 | pragma(inline, false) static void blockopt(ref T x) {} | |
1177 | pragma(inline, false); | |
1178 | FloatingPointControl fpctrl; | |
1179 | fpctrl.rounding = rm; | |
1180 | T x = -1; | |
1181 | blockopt(x); // avoid constant propagation by the optimizer | |
1182 | x -= 0.1L; | |
1183 | return x; | |
1184 | } | |
1185 | ||
1186 | static foreach (T; AliasSeq!(float, double, real)) | |
1187 | {{ | |
1188 | /* Be careful with changing the rounding mode, it interferes | |
1189 | * with common subexpressions. Changing rounding modes should | |
1190 | * be done with separate functions that are not inlined. | |
1191 | */ | |
1192 | ||
1193 | { | |
1194 | T u = addRound!(T)(FloatingPointControl.roundUp); | |
1195 | T d = addRound!(T)(FloatingPointControl.roundDown); | |
1196 | T z = addRound!(T)(FloatingPointControl.roundToZero); | |
1197 | ||
1198 | assert(u > d); | |
1199 | assert(z == d); | |
1200 | } | |
1201 | ||
1202 | { | |
1203 | T u = subRound!(T)(FloatingPointControl.roundUp); | |
1204 | T d = subRound!(T)(FloatingPointControl.roundDown); | |
1205 | T z = subRound!(T)(FloatingPointControl.roundToZero); | |
1206 | ||
1207 | assert(u > d); | |
1208 | assert(z == u); | |
1209 | } | |
1210 | }} | |
1211 | } | |
1212 | ||
1213 | } |