[Ada] Reduce rounding overhead in sin/cos/tan functions on x86

Tue Apr 25 09:43:00 GMT 2017

The trigonometric functions of children of Ada.Numerics are implemented by
inline assembly statements on the x86 architecture, and for sin/cos/tan
a special range reduction algorithm is used to avoid a loss of accuracy in
range reduction implemented in hardware on x86 processors.

This algorithm contains a rounding step and it was implemented inefficiently
by a call to a routine of the runtime.  This patch changes it to using a more
efficient inline sequence of machine instructions instead.

The same change is applied to the range reduction algorithm used prior to
calling the libc routines on PowerPC/Darwin.

The patch also changes the definition of the Double type used to interface
the libc routines in other implementations so as to use the built-in type
corresponding to the C type (Long_Float, except for Long_Long_Float on x86).

Compiling the following package at -O2 -gnatpgn must yield no calls to the
rounding routines of the runtime on x86:

with Ada.Numerics.Generic_Elementary_Functions;

package P is new Ada.Numerics.Generic_Elementary_Functions (Long_Float);

Tested on x86_64-pc-linux-gnu, committed on trunk

2017-04-25  Eric Botcazou  <ebotcazou@adacore.com>

	* a-numaux.ads: Fix description of a-numaux-darwin
	and a-numaux-x86.
	(Double): Define to Long_Float.
	* a-numaux-vxworks.ads (Double): Likewise.
	* a-numaux-darwin.ads
	(Double): Likewise.
	* a-numaux-libc-x86.ads (Double): Define to Long_Long_Float.
	* a-numaux-x86.ads: Fix package description.
	* a-numaux-x86.adb (Is_Nan): Minor tweak.
	(Reduce): Adjust and complete description. Call Is_Nan instead of
	testing manually. Use an integer temporary to hold rounded value.
	* a-numaux-darwin.adb (Reduce): Likewise.
	(Is_Nan): New function.

-------------- next part --------------
Index: a-numaux-vxworks.ads
===================================================================

--- a-numaux-vxworks.ads	(revision 247135)
+++ a-numaux-vxworks.ads	(working copy)
@@ -36,7 +36,7 @@
 package Ada.Numerics.Aux is
    pragma Pure;
 
-   type Double is digits 15;
+   type Double is new Long_Float;
    --  Type Double is the type used to call the C routines
 
    --  We import these functions directly from C. Note that we label them
Index: a-numaux-x86.adb
===================================================================
--- a-numaux-x86.adb	(revision 247135)
+++ a-numaux-x86.adb	(working copy)
@@ -49,8 +49,11 @@
    --  for values of Y in the open interval (-0.25, 0.25)
 
    procedure Reduce (X : in out Double; Q : out Natural);
-   --  Implements reduction of X by Pi/2. Q is the quadrant of the final
-   --  result in the range 0 .. 3. The absolute value of X is at most Pi.
+   --  Implement reduction of X by Pi/2. Q is the quadrant of the final
+   --  result in the range 0..3. The absolute value of X is at most Pi/4.
+   --  It is needed to avoid a loss of accuracy for sin near Pi and cos
+   --  near Pi/2 due to the use of an insufficiently precise value of Pi
+   --  in the range reduction.
 
    pragma Inline (Is_Nan);
    pragma Inline (Reduce);
@@ -117,7 +120,7 @@
    begin
       --  The IEEE NaN values are the only ones that do not equal themselves
 
-      return not (X = X);
+      return X /= X;
    end Is_Nan;
 
    ---------
@@ -154,32 +157,36 @@
       P5 : constant Double := Double'Leading_Part (Half_Pi - P1 - P2 - P3
                                                                  - P4, HM);
       P6 : constant Double := Double'Model (Half_Pi - P1 - P2 - P3 - P4 - P5);
-      K  : Double := X * Two_Over_Pi;
+      K  : Double;
+      R  : Integer;
+
    begin
-      --  For X < 2.0**32, all products below are computed exactly.
+      --  For X < 2.0**HM, all products below are computed exactly.
       --  Due to cancellation effects all subtractions are exact as well.
       --  As no double extended floating-point number has more than 75
       --  zeros after the binary point, the result will be the correctly
       --  rounded result of X - K * (Pi / 2.0).
 
+      K := X * Two_Over_Pi;
       while abs K >= 2.0**HM loop
          K := K * M - (K * M - K);
-         X := (((((X - K * P1) - K * P2) - K * P3)
-                     - K * P4) - K * P5) - K * P6;
+         X :=
+           (((((X - K * P1) - K * P2) - K * P3) - K * P4) - K * P5) - K * P6;
          K := X * Two_Over_Pi;
       end loop;
 
-      if K /= K then
+      --  If K is not a number (because X was not finite) raise exception
 
-         --  K is not a number, because X was not finite
-
+      if Is_Nan (K) then
          raise Constraint_Error;
       end if;
 
-      K := Double'Rounding (K);
-      Q := Integer (K) mod 4;
-      X := (((((X - K * P1) - K * P2) - K * P3)
-                  - K * P4) - K * P5) - K * P6;
+      --  Go through an integer temporary so as to use machine instructions
+
+      R := Integer (Double'Rounding (K));
+      Q := R mod 4;
+      K := Double (R);
+      X := (((((X - K * P1) - K * P2) - K * P3) - K * P4) - K * P5) - K * P6;
    end Reduce;
 
    ----------
Index: a-numaux-x86.ads
===================================================================
--- a-numaux-x86.ads	(revision 247135)
+++ a-numaux-x86.ads	(working copy)
@@ -30,7 +30,8 @@
 --                                                                          --
 ------------------------------------------------------------------------------
 
---  Version for the x86, using 64-bit IEEE format with inline asm statements
+--  This version is for the x86 using the 80-bit x86 long double format with
+--  inline asm statements.
 
 package Ada.Numerics.Aux is
    pragma Pure;
Index: a-numaux-darwin.adb
===================================================================
--- a-numaux-darwin.adb	(revision 247135)
+++ a-numaux-darwin.adb	(working copy)
@@ -36,11 +36,17 @@
    -- Local subprograms --
    -----------------------
 
+   function Is_Nan (X : Double) return Boolean;
+   --  Return True iff X is a IEEE NaN value
+
    procedure Reduce (X : in out Double; Q : out Natural);
-   --  Implements reduction of X by Pi/2. Q is the quadrant of the final
-   --  result in the range 0 .. 3. The absolute value of X is at most Pi/4.
+   --  Implement reduction of X by Pi/2. Q is the quadrant of the final
+   --  result in the range 0..3. The absolute value of X is at most Pi/4.
+   --  It is needed to avoid a loss of accuracy for sin near Pi and cos
+   --  near Pi/2 due to the use of an insufficiently precise value of Pi
+   --  in the range reduction.
 
-   --  The following three functions implement Chebishev approximations
+   --  The following two functions implement Chebishev approximations
    --  of the trigonometric functions in their reduced domain.
    --  These approximations have been computed using Maple.
 
@@ -51,6 +57,10 @@
    pragma Inline (Sine_Approx);
    pragma Inline (Cosine_Approx);
 
+   -------------------
+   -- Cosine_Approx --
+   -------------------
+
    function Cosine_Approx (X : Double) return Double is
       XX : constant Double := X * X;
    begin
@@ -63,6 +73,10 @@
               - 16#3.655E64869ECCE#E-14 + 1.0;
    end Cosine_Approx;
 
+   -----------------
+   -- Sine_Approx --
+   -----------------
+
    function Sine_Approx (X : Double) return Double is
       XX : constant Double := X * X;
    begin
@@ -75,6 +89,17 @@
    end Sine_Approx;
 
    ------------
+   -- Is_Nan --
+   ------------
+
+   function Is_Nan (X : Double) return Boolean is
+   begin
+      --  The IEEE NaN values are the only ones that do not equal themselves
+
+      return X /= X;
+   end Is_Nan;
+
+   ------------
    -- Reduce --
    ------------
 
@@ -92,6 +117,7 @@
                                                                  - P4, HM);
       P6 : constant Double := Double'Model (Half_Pi - P1 - P2 - P3 - P4 - P5);
       K  : Double;
+      R  : Integer;
 
    begin
       --  For X < 2.0**HM, all products below are computed exactly.
@@ -101,7 +127,7 @@
       --  rounded result of X - K * (Pi / 2.0).
 
       K := X * Two_Over_Pi;
-      while abs K >= 2.0 ** HM loop
+      while abs K >= 2.0**HM loop
          K := K * M - (K * M - K);
          X :=
            (((((X - K * P1) - K * P2) - K * P3) - K * P4) - K * P5) - K * P6;
@@ -110,14 +136,16 @@
 
       --  If K is not a number (because X was not finite) raise exception
 
-      if K /= K then
+      if Is_Nan (K) then
          raise Constraint_Error;
       end if;
 
-      K := Double'Rounding (K);
-      Q := Integer (K) mod 4;
-      X := (((((X - K * P1) - K * P2) - K * P3)
-                  - K * P4) - K * P5) - K * P6;
+      --  Go through an integer temporary so as to use machine instructions
+
+      R := Integer (Double'Rounding (K));
+      Q := R mod 4;
+      K := Double (R);
+      X := (((((X - K * P1) - K * P2) - K * P3) - K * P4) - K * P5) - K * P6;
    end Reduce;
 
    ---------
Index: a-numaux-darwin.ads
===================================================================
--- a-numaux-darwin.ads	(revision 247135)
+++ a-numaux-darwin.ads	(working copy)
@@ -39,7 +39,7 @@
 
    pragma Linker_Options ("-lm");
 
-   type Double is digits 15;
+   type Double is new Long_Float;
    --  Type Double is the type used to call the C routines
 
    --  The following functions have been implemented in Ada, since
Index: a-numaux.ads
===================================================================
--- a-numaux.ads	(revision 247135)
+++ a-numaux.ads	(working copy)
@@ -40,9 +40,10 @@
 --  This version here is for use with normal Unix math functions. Alternative
 --  versions are provided for special situations:
 
---    a-numaux-darwin    For OS/X (special handling of sin/cos for accuracy)
+--    a-numaux-darwin    For PowerPC/Darwin (special handling of sin/cos)
 --    a-numaux-libc-x86  For the x86, using 80-bit long double format
---    a-numaux-x86       For the x86, using 64-bit IEEE (inline asm statements)
+--    a-numaux-x86       For the x86, using 80-bit long double format with
+--                       inline asm statements
 --    a-numaux-vxworks   For use on VxWorks (where we have no libm.a library)
 
 package Ada.Numerics.Aux is
@@ -50,7 +51,7 @@
 
    pragma Linker_Options ("-lm");
 
-   type Double is digits 15;
+   type Double is new Long_Float;
    --  Type Double is the type used to call the C routines
 
    --  We import these functions directly from C. Note that we label them
Index: a-numaux-libc-x86.ads
===================================================================
--- a-numaux-libc-x86.ads	(revision 247135)
+++ a-numaux-libc-x86.ads	(working copy)
@@ -37,7 +37,7 @@
 
    pragma Linker_Options ("-lm");
 
-   type Double is digits 18;
+   type Double is new Long_Long_Float;
 
    --  We import these functions directly from C. Note that we label them
    --  all as pure functions, because indeed all of them are in fact pure.