src/float/transcendental/cl_F_atanhx.cc

   1 // atanhx().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 // Specification.
   7 #include "cl_F_tran.h"
   8 #include "cl_F.h"
   9 #include "cln/lfloat.h"
  10 #include "cl_LF.h"
  11
  12
  13 // Implementation.
  14
  15 #include "cln/float.h"
  16 #include "cl_low.h"
  17
  18 #undef MAYBE_INLINE
  19 #define MAYBE_INLINE inline
  20 #include "cl_LF_zerop.cc"
  21 #include "cl_LF_minusp.cc"
  22 #include "cl_LF_exponent.cc"
  23
  24 namespace cln {
  25
  26 // cl_F atanhx (const cl_F& x)
  27 // cl_LF atanhx (const cl_LF& x)
  28 //
  29 // Methode:
  30 // e := Exponent aus (decode-float x), d := (float-digits x)
  31 // Bei x=0.0 oder e<=-d/2 liefere x
  32 //   (denn bei e<=-d/2 ist x^2 < 2^(-d), also
  33 //   1 <= atanh(x)/x = 1+x^2/3+x^4/5+... < 1+x^2/2 < 1+2^(-d-1) < 1+2^(-d),
  34 //   also ist atanh(x)/x, auf d Bits gerundet, gleich 1.0).
  35 // Bei großem d verwende die Formel ln((1+x)/(1-x))/2 (asymptotisch schneller),
  36 //   aber erhöhe die Genauigkeit, so daß beim Bilden von 1+x keine Bits verloren
  37 //   gehen.
  38 // Bei e<=-sqrt(d) verwende die Potenzreihe
  39 //   atanh(x)/x = sum(j=0..inf,(x^2)^j/(2j+1)):
  40 //   a:=x^2, b:=1, i:=1, sum:=0,
  41 //   while (/= sum (setq sum (+ sum (/ b i)))) do i:=i+2, b:=b*a.
  42 //   Ergebnis x*sum.
  43 // Sonst setze y := x/(1+sqrt(1-x^2)), berechne rekursiv z:=atanh(y)
  44 //   und liefere 2*z = (scale-float z 1).
  45 // Diese Rekursion wird entrekursiviert. Statt k mal hintereinander
  46 //   x := x/(1+sqrt(1-x^2)) zu bilden, arbeitet man lieber mit den Kehrwerten,
  47 //   setzt also x := 1/|x|, dann k mal x := x+sqrt(x^2-1), dann x := +- 1/x.
  48 // Aufwand: asymptotisch d^2.5 .
  49
  50 const cl_LF atanhx (const cl_LF& x)
  51 {
  52         if (zerop(x))
  53                 return x;
  54         var uintC actuallen = TheLfloat(x)->len;
  55         var uintC d = float_digits(x);
  56         var sintL e = float_exponent(x);
  57         if (e <= (sintC)(-d)>>1) // e <= -d/2 <==> e <= -ceiling(d/2)
  58                 return x; // ja -> x als Ergebnis
  59         if (actuallen >= 34) {
  60                 DeclareType(cl_LF,x);
  61                 var cl_LF xx = extend(x,TheLfloat(x)->len+ceiling((uintL)(-e),intDsize));
  62                 return cl_float(scale_float(ln((1+xx)/(1-xx)),-1),x);
  63         }
  64         var uintL k = 0; // Rekursionszähler k:=0
  65         // Bei e <= -1-limit_slope*floor(sqrt(d)) kann die Potenzreihe
  66         // angewandt werden. limit_slope = 1.0 ist schlecht (ca. 15% zu
  67         // schlecht). Ein guter Wert ist:
  68         // für naive1: limit_scope = 0.625 = 5/8,
  69         // für naive2: limit_scope = 0.4 = 13/32.
  70         var uintL sqrt_d = floor(isqrt(d)*13,32); // limit_slope*floor(sqrt(d))
  71         var cl_LF xx = x;
  72         if (e >= (sintL)(-sqrt_d)) {
  73                 // e > -1-limit_slope*floor(sqrt(d)) -> muß |x| verkleinern.
  74                 var sintL e_limit = 1+sqrt_d; // 1+limit_slope*floor(sqrt(d))
  75                 xx = recip(abs(xx)); // 1/|x|
  76                 do {
  77                   // nächstes x nach der Formel x := x+sqrt(x^2 - 1) berechnen:
  78                   xx = sqrt(square(xx) + cl_float(-1,xx)) + xx;
  79                   k = k+1;
  80                 } until (float_exponent(xx) > e_limit);
  81                 // Schleifenende mit Exponent(x) > 1+limit_slope*floor(sqrt(d)),
  82                 // also x >= 2^(1+limit_slope*floor(sqrt(d))),
  83                 // also 1/x <= 2^(-1-limit_slope*floor(sqrt(d))).
  84                 // Nun kann die Potenzreihe auf 1/x angewandt werden.
  85                 xx = recip(xx);
  86                 if (minusp(x))
  87                         xx = - xx; // Vorzeichen wieder rein
  88         }
  89         // Potenzreihe anwenden:
  90         var int i = 1;
  91         var cl_LF a = square(xx); // a = x^2
  92         var cl_LF b = cl_float(1,xx); // b := (float 1 x)
  93         var cl_LF sum = cl_float(0,xx); // sum := (float 0 x)
  94         if (0) {
  95                 // naive1:
  96                 // floating-point representation
  97                 loop {
  98                         var cl_LF new_sum = sum + b/(cl_I)i; // (+ sum (/ b i))
  99                         if (new_sum == sum) // = sum ?
 100                                 break; // ja -> Potenzreihe abbrechen
 101                         sum = new_sum;
 102                         b = b*a;
 103                         i = i+2;
 104                 }
 105         } else {
 106                 // naive2:
 107                 // floating-point representation with smooth precision reduction
 108                 var cl_LF eps = scale_float(b,-(sintC)d-10);
 109                 loop {
 110                         var cl_LF new_sum = sum + LF_to_LF(b/(cl_I)i,actuallen); // (+ sum (/ b i))
 111                         if (new_sum == sum) // = sum ?
 112                                 break; // ja -> Potenzreihe abbrechen
 113                         sum = new_sum;
 114                         b = cl_LF_shortenwith(b,eps);
 115                         b = b*a;
 116                         i = i+2;
 117                 }
 118         }
 119         var cl_LF erg = sum*xx; // sum*x als Ergebnis
 120         return scale_float(erg,k); // wegen Rekursion noch mal 2^k
 121 }
 122 // Bit complexity (N = length(x)): O(log(N)^2*M(N)).
 123
 124 const cl_F atanhx (const cl_F& x)
 125 {
 126         if (longfloatp(x)) {
 127                 DeclareType(cl_LF,x);
 128                 return atanhx(x);
 129         }
 130         if (zerop(x))
 131                 return x;
 132         var uintC d = float_digits(x);
 133         var sintL e = float_exponent(x);
 134         if (e <= (sintC)(-d)>>1) // e <= -d/2 <==> e <= -ceiling(d/2)
 135                 return x; // ja -> x als Ergebnis
 136         var uintL k = 0; // Rekursionszähler k:=0
 137         // Bei e <= -1-limit_slope*floor(sqrt(d)) kann die Potenzreihe
 138         // angewandt werden. limit_slope = 1.0 ist schlecht (ca. 15% zu
 139         // schlecht). Ein guter Wert ist limit_scope = 0.625 = 5/8.
 140         var uintL sqrt_d = floor(isqrt(d)*5,8); // limit_slope*floor(sqrt(d))
 141         var cl_F xx = x;
 142         if (e >= (sintL)(-sqrt_d)) {
 143                 // e > -1-limit_slope*floor(sqrt(d)) -> muß |x| verkleinern.
 144                 var sintL e_limit = 1+sqrt_d; // 1+limit_slope*floor(sqrt(d))
 145                 xx = recip(abs(xx)); // 1/|x|
 146                 do {
 147                   // nächstes x nach der Formel x := x+sqrt(x^2 - 1) berechnen:
 148                   xx = sqrt(square(xx) + cl_float(-1,xx)) + xx;
 149                   k = k+1;
 150                 } until (float_exponent(xx) > e_limit);
 151                 // Schleifenende mit Exponent(x) > 1+limit_slope*floor(sqrt(d)),
 152                 // also x >= 2^(1+limit_slope*floor(sqrt(d))),
 153                 // also 1/x <= 2^(-1-limit_slope*floor(sqrt(d))).
 154                 // Nun kann die Potenzreihe auf 1/x angewandt werden.
 155                 xx = recip(xx);
 156                 if (minusp(x))
 157                         xx = - xx; // Vorzeichen wieder rein
 158         }
 159         // Potenzreihe anwenden:
 160         var int i = 1;
 161         var cl_F a = square(xx); // a = x^2
 162         var cl_F b = cl_float(1,xx); // b := (float 1 x)
 163         var cl_F sum = cl_float(0,xx); // sum := (float 0 x)
 164         loop {
 165                 var cl_F new_sum = sum + b / (cl_I)i; // (+ sum (/ b i))
 166                 if (new_sum == sum) // = sum ?
 167                         break; // ja -> Potenzreihe abbrechen
 168                 sum = new_sum;
 169                 b = b*a;
 170                 i = i+2;
 171         }
 172         var cl_F erg = sum*xx; // sum*x als Ergebnis
 173         return scale_float(erg,k); // wegen Rekursion noch mal 2^k
 174 }
 175 // Bit complexity (N = length(x)): O(log(N)^2*M(N)).
 176
 177 // Timings of the above algorithms, on an i486 33 MHz, running Linux,
 178 // applied to x = sqrt(2)-1 = 0.414...
 179 //   N      naive1  naive2  use ln
 180 //   10     0.013   0.013   0.015
 181 //   25     0.064   0.050   0.049
 182 //   50     0.25    0.018   0.17
 183 //  100     1.07    0.75    0.64
 184 //  250     7.6     5.2     2.7
 185 //  500    35.5    24.2     9.7
 186 // 1000   168     116      29.6
 187 // ==> using ln faster for N >= 34.
 188
 189 }  // namespace cln