src/float/transcendental/cl_F_atanx.cc

   1 // atanx().
   2
   3 // General includes.
   4 #include "base/cl_sysdep.h"
   5
   6 // Specification.
   7 #include "float/transcendental/cl_F_tran.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cln/float.h"
  13 #include "base/cl_low.h"
  14 #include "float/cl_F.h"
  15 #include "cln/lfloat.h"
  16 #include "float/lfloat/cl_LF.h"
  17 #include "cln/integer.h"
  18
  19 #include "base/cl_inline.h"
  20 #include "float/lfloat/elem/cl_LF_zerop.cc"
  21 #include "float/lfloat/elem/cl_LF_minusp.cc"
  22 #include "float/lfloat/misc/cl_LF_exponent.cc"
  23
  24 namespace cln {
  25
  26 // cl_F atanx_naive (const cl_F& x)
  27 // cl_LF atanx_naive (const cl_LF& x)
  28 //
  29 // Methode:
  30 // e := Exponent aus (decode-float x), d := (float-digits x)
  31 // Bei x=0.0 oder e<=-d/2 liefere x
  32 //   (denn bei e<=-d/2 ist x^2/3 < x^2/2 < 2^(-d)/2 = 2^(-d-1), also
  33 //   1 >= atan(x)/x > 1-x^2/3 > 1-2^(-d-1),
  34 //   also ist atan(x)/x, auf d Bits gerundet, gleich 1.0).
  35 // Bei e<=-sqrt(d) verwende die Potenzreihe
  36 //   atan(x)/x = sum(j=0..inf,(-x^2)^j/(2j+1)):
  37 //   a:=-x^2, b:=1, i:=1, sum:=0,
  38 //   while (/= sum (setq sum (+ sum (/ b i)))) do i:=i+2, b:=b*a.
  39 //   Ergebnis x*sum.
  40 // Sonst setze y := x/(1+sqrt(1+x^2)), berechne rekursiv z:=atan(y)
  41 //   und liefere 2*z = (scale-float z 1).
  42 // Diese Rekursion wird entrekursiviert. Statt k mal hintereinander
  43 //   x := x/(1+sqrt(1+x^2)) zu bilden, arbeitet man lieber mit den Kehrwerten,
  44 //   setzt also x := 1/|x|, dann k mal x := x+sqrt(x^2+1), dann x := +- 1/x.
  45 // Aufwand: asymptotisch d^2.5 .
  46
  47 static const cl_LF atanx_naive (const cl_LF& x)
  48 {
  49         if (zerop_inline(x))
  50                 return x;
  51         var uintC actuallen = TheLfloat(x)->len;
  52         var uintC d = float_digits(x);
  53         var sintE e = float_exponent_inline(x);
  54         if (e <= (sintC)(-d)>>1) // e <= -d/2 <==> e <= -ceiling(d/2)
  55                 return x; // ja -> x als Ergebnis
  56         var uintL k = 0; // Rekursionszähler k:=0
  57         // Bei e <= -1-limit_slope*floor(sqrt(d)) kann die Potenzreihe
  58         // angewandt werden. limit_slope = 1.0 ist schlecht (ca. 20% zu
  59         // schlecht). Ein guter Wert ist:
  60         // Für naive1: limit_scope = 0.5.
  61         // Für naive2: limit_scope = 0.375 (ca. 0.5 für kleine len, 0.35 für
  62         // große len).
  63         var uintL sqrt_d = floor(isqrtC(d)*3,8); // limit_slope*floor(sqrt(d))
  64         var cl_LF xx = x;
  65         if (e >= (sintL)(-sqrt_d)) {
  66                 // e > -1-limit_slope*floor(sqrt(d)) -> muß |x| verkleinern.
  67                 var sintL e_limit = 1+sqrt_d; // 1+limit_slope*floor(sqrt(d))
  68                 xx = recip(abs(xx)); // 1/|x|
  69                 do {
  70                   // nächstes x nach der Formel x := x+sqrt(x^2 + 1) berechnen:
  71                   xx = sqrt(square(xx) + cl_float(1,xx)) + xx;
  72                   k = k+1;
  73                 } until (float_exponent_inline(xx) > e_limit);
  74                 // Schleifenende mit Exponent(x) > 1+limit_slope*floor(sqrt(d)),
  75                 // also x >= 2^(1+limit_slope*floor(sqrt(d))),
  76                 // also 1/x <= 2^(-1-limit_slope*floor(sqrt(d))).
  77                 // Nun kann die Potenzreihe auf 1/x angewandt werden.
  78                 xx = recip(xx);
  79                 if (minusp_inline(x))
  80                         xx = - xx; // Vorzeichen wieder rein
  81         }
  82         // Potenzreihe anwenden:
  83         var int i = 1;
  84         var cl_LF a = - square(xx); // a = - x^2
  85         var cl_LF b = cl_float(1,xx); // b := (float 1 x)
  86         var cl_LF sum = cl_float(0,xx); // sum := (float 0 x)
  87         if (0) {
  88                 // naive1:
  89                 // floating-point representation
  90                 loop {
  91                         var cl_LF new_sum = sum + b / (cl_I)i; // (+ sum (/ b i))
  92                         if (new_sum == sum) // = sum ?
  93                                 break; // ja -> Potenzreihe abbrechen
  94                         sum = new_sum;
  95                         b = b*a;
  96                         i = i+2;
  97                 }
  98         } else {
  99                 // naive2:
 100                 // floating-point representation with smooth precision reduction
 101                 var cl_LF eps = scale_float(b,-(sintC)d-10);
 102                 loop {
 103                         var cl_LF new_sum = sum + LF_to_LF(b/(cl_I)i,actuallen); // (+ sum (/ b i))
 104                         if (new_sum == sum) // = sum ?
 105                                 break; // ja -> Potenzreihe abbrechen
 106                         sum = new_sum;
 107                         b = cl_LF_shortenwith(b,eps);
 108                         b = b*a;
 109                         i = i+2;
 110                 }
 111         }
 112         var cl_LF erg = sum*xx; // sum*x als Ergebnis
 113         return scale_float(erg,k); // wegen Rekursion noch mal 2^k
 114 }
 115 // Bit complexity (N = length(x)): O(N^(1/2)*M(N)).
 116
 117 static const cl_F atanx_naive (const cl_F& x)
 118 {
 119         if (zerop(x))
 120                 return x;
 121         var uintC d = float_digits(x);
 122         var sintE e = float_exponent(x);
 123         if (e <= (sintC)(-d)>>1) // e <= -d/2 <==> e <= -ceiling(d/2)
 124                 return x; // ja -> x als Ergebnis
 125         var uintL k = 0; // Rekursionszähler k:=0
 126         var uintL sqrt_d = floor(isqrtC(d),2); // limit_slope*floor(sqrt(d))
 127         // Bei e <= -1-limit_slope*floor(sqrt(d)) kann die Potenzreihe
 128         // angewandt werden. limit_slope = 1.0 ist schlecht (ca. 20% zu
 129         // schlecht). Ein guter Wert ist limit_scope = 0.5.
 130         var cl_F xx = x;
 131         if (e >= (sintL)(-sqrt_d)) {
 132                 // e > -1-limit_slope*floor(sqrt(d)) -> muß |x| verkleinern.
 133                 var sintL e_limit = 1+sqrt_d; // 1+limit_slope*floor(sqrt(d))
 134                 xx = recip(abs(xx)); // 1/|x|
 135                 do {
 136                   // nächstes x nach der Formel x := x+sqrt(x^2 + 1) berechnen:
 137                   xx = sqrt(square(xx) + cl_float(1,xx)) + xx;
 138                   k = k+1;
 139                 } until (float_exponent(xx) > e_limit);
 140                 // Schleifenende mit Exponent(x) > 1+limit_slope*floor(sqrt(d)),
 141                 // also x >= 2^(1+limit_slope*floor(sqrt(d))),
 142                 // also 1/x <= 2^(-1-limit_slope*floor(sqrt(d))).
 143                 // Nun kann die Potenzreihe auf 1/x angewandt werden.
 144                 xx = recip(xx);
 145                 if (minusp(x))
 146                         xx = - xx; // Vorzeichen wieder rein
 147         }
 148         // Potenzreihe anwenden:
 149         var int i = 1;
 150         var cl_F a = - square(xx); // a = - x^2
 151         var cl_F b = cl_float(1,xx); // b := (float 1 x)
 152         var cl_F sum = cl_float(0,xx); // sum := (float 0 x)
 153         loop {
 154                 var cl_F new_sum = sum + b / (cl_I)i; // (+ sum (/ b i))
 155                 if (new_sum == sum) // = sum ?
 156                         break; // ja -> Potenzreihe abbrechen
 157                 sum = new_sum;
 158                 b = b*a;
 159                 i = i+2;
 160         }
 161         var cl_F erg = sum*xx; // sum*x als Ergebnis
 162         return scale_float(erg,k); // wegen Rekursion noch mal 2^k
 163 }
 164 // Bit complexity (N = length(x)): O(N^(1/2)*M(N)).
 165
 166 static const cl_LF atanx_ratseries (const cl_LF& t)
 167 {
 168         // Method:
 169         // Based on the same ideas as lnx_ratseries.
 170         //   e := exponent of (decode-float t), d := (float-digits t).
 171         //   If t=0.0 or e<=-d/2, return t.
 172         // (x,y) := (1/sqrt(1+t^2),t/sqrt(1+t^2)), z := 0.
 173         // Loop
 174         //   [(x+i*y)*exp(i*z) is invariant, x>0, sqrt(x^2+y^2)=1]
 175         //   e := exponent of (decode-float y), d := (float-digits y).
 176         //   If y=0.0 or e<=-d/2, return z+y
 177         //   (because if e<=-d/2 then |y|^3/6 < 2^(-d)/2*|y|, and since
 178         //   asin(y) = y+y^3/6+..., asin(y) rounded to d bits is = y).
 179         //   Choose approximation z' of angle(x+i*y):
 180         //     If |y| >= 1/2, set z' = 1/2 * sign(y).
 181         //     If |y| < 2^-n with n maximal, set
 182         //       z' = truncate(y*2^(2n))/2^(2n).
 183         //   Set z := z + z' and x+i*y := (x+i*y)*exp(-i*z').
 184         var uintC len = TheLfloat(t)->len;
 185         var uintC d = intDsize*len;
 186         if (zerop_inline(t) || (float_exponent_inline(t) <= (sintC)(-d)>>1))
 187                 return t;
 188         var cl_LF x = recip(sqrt(cl_I_to_LF(1,len) + square(t)));
 189         var cl_LF y = t*x;
 190         var cl_LF z = cl_I_to_LF(0,len);
 191         loop {
 192                 if (zerop_inline(y) || (float_exponent_inline(y) <= (sintC)(-d)>>1))
 193                         break;
 194                 var cl_idecoded_float y_ = integer_decode_float(y);
 195                 // y = (-1)^sign * 2^exponent * mantissa
 196                 var uintC lm = integer_length(y_.mantissa);
 197                 var uintE me = cl_I_to_UE(- y_.exponent);
 198                 var cl_I p;
 199                 var uintE lq;
 200                 var bool last_step = false;
 201                 if (lm >= me) { // |y| >= 1/2 ?
 202                         p = y_.sign; // 1 or -1
 203                         lq = 1;
 204                 } else {
 205                         var uintE n = me - lm; // |y| < 2^-n with n maximal
 206                         // Set p to the first n bits of |y|:
 207                         if (lm > n) {
 208                                 p = y_.mantissa >> (lm - n);
 209                                 lq = 2*n;
 210                         } else {
 211                                 p = y_.mantissa;
 212                                 lq = lm + n;
 213                         }
 214                         if (minusp(y_.sign)) { p = -p; }
 215                         // If 2*n >= lm = intDsize*len, then within our
 216                         // precision exp(-i*z')=1-i*z' (because |z'^2| < 2^-lm),
 217                         // and we know a priori that the iteration will stop
 218                         // after the next big multiplication. This saves one
 219                         // big multiplication at the end.
 220                         if (2*n >= lm)
 221                                 last_step = true;
 222                 }
 223                 z = z + scale_float(cl_I_to_LF(p,len),-(sintE)lq);
 224                 if (last_step)
 225                         break;
 226                 var cl_LF_cos_sin_t cis_z = cl_cossin_aux(-p,lq,len);
 227                 var cl_LF new_x = x*cis_z.cos - y*cis_z.sin;
 228                 var cl_LF new_y = x*cis_z.sin + y*cis_z.cos;
 229                 x = new_x; y = new_y;
 230         }
 231         return z+y;
 232 }
 233 // Bit complexity (N = length(x)): O(log(N)^2*M(N)).
 234
 235 // Timings of the above algorithms, on an i486 33 MHz, running Linux,
 236 // applied to x = sqrt(2)-1 = 0.414...
 237 //   N      naive1  naive2  ratseries
 238 //   10     0.013   0.013   0.043
 239 //   25     0.062   0.048   0.122
 240 //   50     0.25    0.17    0.34
 241 //  100     1.06    0.70    1.07
 242 //  250     7.5     5.0     5.6
 243 //  500    34.7    23.2    20.0
 244 // 1000   167     112      65
 245 // ==> ratseries faster for N >= 325.
 246
 247 const cl_F CL_FLATTEN atanx (const cl_F& x)
 248 {
 249         if (longfloatp(x)) {
 250                 DeclareType(cl_LF,x);
 251                 if (TheLfloat(x)->len >= 325)
 252                         return cl_float(atanx_ratseries(extend(x,TheLfloat(x)->len+1)),x);
 253                 else
 254                         return atanx_naive(x);
 255         } else
 256                 return atanx_naive(x);
 257 }
 258 // Bit complexity (N = length(x)): O(log(N)^2*M(N)).
 259
 260 }  // namespace cln