src/float/transcendental/cl_F_expx.cc

   1 // expx().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 // Specification.
   7 #include "cl_F_tran.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cln/float.h"
  13 #include "cl_low.h"
  14 #include "cl_F.h"
  15 #include "cln/lfloat.h"
  16 #include "cl_LF.h"
  17 #include "cln/integer.h"
  18
  19 #include "cl_inline.h"
  20 #include "cl_LF_zerop.cc"
  21 #include "cl_LF_exponent.cc"
  22
  23 namespace cln {
  24
  25 // cl_F expx_naive (const cl_F& x)
  26 // cl_LF expx_naive (const cl_LF& x)
  27 //
  28 // Methode:
  29 // e := Exponent aus (decode-float x), d := (float-digits x)
  30 // Bei x=0.0 oder e<-d liefere 1.0
  31 //   (denn bei e<=-d-1 ist abs(exp(x)-1) = abs(x)+O(x^2) < 2^(-d-1),
  32 //    also ist exp(x), auf d Bits gerundet, gleich 1.0).
  33 // Bei e<=-sqrt(d) verwende die Potenzreihe
  34 //   exp(x) = sum(j=0..inf,x^j/j!):
  35 //   b:=1, i:=0, sum:=0,
  36 //   while (/= sum (setq sum (+ sum b))) do b:=b*x/(i+1), i:=i+1.
  37 //   Ergebnis sum.
  38 // Sonst setze y := x/2 = (scale-float x -1),
  39 //   berechne rekursiv z:=exp(y) und liefere z^2.
  40 // Aufwand: asymptotisch d^2.5 .
  41
  42 const cl_LF expx_naive (const cl_LF& x)
  43 {
  44 // Methode:
  45 // wie oben, mit adaptiver Genauigkeit während der Potenzreihen-Summation.
  46         if (zerop_inline(x))
  47                 return cl_float(1,x);
  48         var uintC actuallen = TheLfloat(x)->len;
  49         var uintC d = float_digits(x);
  50         var sintE e = float_exponent_inline(x);
  51         if (e < -(sintC)d) // e < -d ?
  52                 return cl_float(1,x); // ja -> 1.0 als Ergebnis
  53  {      Mutable(cl_LF,x);
  54         var uintE k = 0; // Rekursionszähler k:=0
  55         // Bei e <= -1-limit_slope*floor(sqrt(d)) kann die Potenzreihe
  56         // angewandt werden. limit_slope = 1.0 ist nicht schlecht,
  57         // auch im Bereich d = ca. 800.
  58         var sintL e_limit = -1-isqrtC(d); // -1-floor(sqrt(d))
  59         if (e > e_limit) {
  60                 // e > -1-floor(sqrt(d)) -> muß |x| verkleinern.
  61                 k = e - e_limit;
  62                 x = scale_float(x,-(sintE)k); // x := x/2^k
  63                 // Neuer Exponent = e-k = e_limit.
  64         }
  65         // Potenzreihe anwenden:
  66         var int i = 0;
  67         var cl_LF b = cl_float(1,x); // b := (float 1 x)
  68         var cl_LF eps = scale_float(b,-(sintC)d-10);
  69         var cl_LF sum = cl_float(0,x); // sum := (float 0 x)
  70         loop {
  71                 var cl_LF new_sum = sum + LF_to_LF(b,actuallen);
  72                 if (new_sum == sum) // = sum ?
  73                         break; // ja -> Potenzreihe abbrechen
  74                 sum = new_sum;
  75                 i = i+1;
  76                 b = cl_LF_shortenwith(b,eps);
  77                 b = (b*x)/(cl_I)i; // b := b*x/i
  78         }
  79         var cl_LF& result = sum; // sum als Ergebnis
  80         // Wegen Rekursion noch k mal quadrieren:
  81         for ( ; k > 0; k--)
  82                 result = square(result);
  83         return result;
  84 }}
  85 // Bit complexity (N = length(x)): O(N^(1/2)*M(N)).
  86
  87 const cl_F expx_naive (const cl_F& x)
  88 {
  89         if (longfloatp(x)) {
  90                 DeclareType(cl_LF,x);
  91                 return expx_naive(x);
  92         }
  93         if (zerop(x))
  94                 return cl_float(1,x);
  95         var uintC d = float_digits(x);
  96         var sintE e = float_exponent(x);
  97         if (e < -(sintC)d) // e < -d ?
  98                 return cl_float(1,x); // ja -> 1.0 als Ergebnis
  99  {      Mutable(cl_F,x);
 100         var uintE k = 0; // Rekursionszähler k:=0
 101         // Bei e <= -1-limit_slope*floor(sqrt(d)) kann die Potenzreihe
 102         // angewandt werden. limit_slope = 1.0 ist nicht schlecht. Für
 103         // d > 1600 scheint der Bereich 2.0 <= limit_slope <= 2.6 am besten
 104         // zu sein (mit bis zu 15% Beschleunigung gegenüber limit_slope = 1.0),
 105         // aber in diesem Bereich rechnen wir gar nicht.
 106         // Wir wählen limit_slope = 1.5.
 107         var sintL e_limit = -1-floor(isqrtC(d)*3,2); // -1-floor(sqrt(d))
 108         if (e > e_limit) {
 109                 // e > -1-floor(sqrt(d)) -> muß |x| verkleinern.
 110                 k = e - e_limit;
 111                 x = scale_float(x,-(sintE)k); // x := x/2^k
 112                 // Neuer Exponent = e-k = e_limit.
 113         }
 114         // Potenzreihe anwenden:
 115         var int i = 0;
 116         var cl_F b = cl_float(1,x); // b := (float 1 x)
 117         var cl_F sum = cl_float(0,x); // sum := (float 0 x)
 118         loop {
 119                 var cl_F new_sum = sum + b;
 120                 if (new_sum == sum) // = sum ?
 121                         break; // ja -> Potenzreihe abbrechen
 122                 sum = new_sum;
 123                 i = i+1;
 124                 b = (b*x)/(cl_I)i; // b := b*x/i
 125         }
 126         var cl_F& result = sum; // sum als Ergebnis
 127         // Wegen Rekursion noch k mal quadrieren:
 128         for ( ; k > 0; k--)
 129                 result = square(result);
 130         return result;
 131 }}
 132 // Bit complexity (N = length(x)): O(N^(1/2)*M(N)).
 133
 134 const cl_LF expx_ratseries (const cl_LF& x)
 135 {
 136         // [Jonathan M. Borwein, Peter B. Borwein: Pi and the AGM.
 137         //  Wiley 1987. Section 10.2.3]
 138         var uintC len = TheLfloat(x)->len;
 139         var cl_idecoded_float x_ = integer_decode_float(x);
 140         // x = (-1)^sign * 2^exponent * mantissa
 141         var uintE lq = cl_I_to_UE(- x_.exponent);
 142         var const cl_I& p = x_.mantissa;
 143         // Compute exp(p/2^lq) by splitting into pieces.
 144         // Each piece gives rise to a factor exp(pk/2^lqk).
 145         // Instead of the standard choice lqk = 2^k, we choose
 146         // lqk = c^k + O(1), where c > 1 is real.
 147         // Running time on Linux i486, 33 Mhz, computing exp(sqrt(2)-1):
 148         //   c  2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.0 3.1 3.2 3.3 3.4 3.5
 149         //  (a) 400 393 390 377 371 360 363 367 367 358 362 362 363 362 376 372
 150         //  (b) 311 317 305 312 295 291 286 293 291 284 295 284 293 287 288 305
 151         // (a): N=300, time in 0.01 sec. (b): N=1000, time in 0.1 sec.
 152         // Values 2.5 <= c <= 3.2 seem best. Let's choose c = 2.875.
 153         var bool first_factor = true;
 154         var cl_LF product;
 155         var uintE b1;
 156         var uintE b2;
 157         for (b1 = 0, b2 = 1; b1 < lq; b1 = b2, b2 = ceiling(b2*23,8)) {
 158                 // Piece containing bits b1+1..b2 after "decimal point"
 159                 // in the binary representation of (p/2^lq).
 160                 var uintE lqk = (lq >= b2 ? b2 : lq);
 161                 var cl_I pk = ldb(p,cl_byte(lqk-b1,lq-lqk));
 162                 // Compute exp(pk/2^lqk).
 163                 if (!zerop(pk)) {
 164                         if (minusp(x_.sign)) { pk = -pk; }
 165                         var cl_LF factor = cl_exp_aux(pk,lqk,len);
 166                         if (first_factor) {
 167                                 product = factor;
 168                                 first_factor = false;
 169                         } else
 170                                 product = product * factor;
 171                 }
 172         }
 173         if (first_factor)
 174                 return cl_I_to_LF(1,len);
 175         else
 176                 return product;
 177 }
 178 // Bit complexity (N = length(x)): O(log(N)^2*M(N)).
 179
 180 // Timings of the above algorithms, on an i486 33 MHz, running Linux,
 181 // applied to x = sqrt(2)-1 = 0.414...
 182 // ("naive" with adaptive limit_slope, about sqrt(ln(len)).)
 183 //   N      naive  ratseries
 184 //   10     0.010   0.027
 185 //   25     0.039   0.072
 186 //   50     0.15    0.19
 187 //  100     0.60    0.55
 188 //  250     3.9     2.6
 189 //  500    16.3     9.3
 190 // 1000    68      29
 191 // ==> ratseries faster for N >= 84.
 192
 193 }  // namespace cln