src/float/transcendental/cl_F_sinhx.cc

   1 // sinhxbyx(), sinhx().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 // Specification.
   7 #include "cl_F_tran.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cl_float.h"
  13 #include "cl_low.h"
  14 #include "cl_F.h"
  15 #include "cl_lfloat.h"
  16 #include "cl_LF.h"
  17 #include "cl_integer.h"
  18
  19 #undef MAYBE_INLINE
  20 #define MAYBE_INLINE inline
  21 #include "cl_LF_zerop.cc"
  22 #include "cl_LF_exponent.cc"
  23
  24 // sinhxbyx is mainly for cl_SF, cl_FF, cl_DF, where we want to avoid underflow.
  25
  26 const cl_F sinhxbyx_naive (const cl_F& x)
  27 {
  28 // Methode:
  29 // e := Exponent aus (decode-float x), d := (float-digits x)
  30 // Bei x=0.0 oder e<=(1-d)/2 liefere 1.0
  31 //   (denn bei e<=(1-d)/2 ist x^2/6 < x^2/4 < 2^(1-d)/4 = 2^(-d-1), also
  32 //   1 <= sinh(x)/x = 1+x^2/6+... < 1+2^(-d-1), also 1 <= (sinh(x)/x)^2 < 1+2^(-d),
  33 //   also ist (sinh(x)/x)^2, auf d Bits gerundet, gleich 1.0).
  34 // Bei e<=-sqrt(d) verwende die Potenzreihe
  35 //   sinh(x)/x = sum(j=0..inf,(x^2)^j/(2j+1)!):
  36 //   a:=x^2, b:=1, i:=1, sum:=0,
  37 //   while (/= sum (setq sum (+ sum b))) do b:=b*a/((i+1)*(i+2)), i:=i+2.
  38 //   Ergebnis sum^2.
  39 // Sonst setze y := x/2 = (scale-float x -1),
  40 //   berechne rekursiv z:=(sinh(y)/y)^2 und liefere z*(1+y^2*z).
  41 // [Die Grenze sqrt(d) ergibt sich so:
  42 //  Man braucht bei der Potenzreihe mit x=2^-k etwa j Glieder, mit
  43 //  k*j*ln 2 + j*(ln j - 1) = d, und der Aufwand beträgt etwa 2.8*(j/2)
  44 //  Multiplikationen von d-Bit-Zahlen. Bei Halbierungen bis x=2^-k ist der
  45 //  Gesamtaufwand etwa 2*(k+e)+1.4*j(k). Dieses minimieren nach k: Soll sein
  46 //  -1.4 = d/dk j(k) = (d/dj k(j))^-1 = - j^2/(d+j)*ln 2, also j^2=2(d+j),
  47 //  grob j=sqrt(2d) und damit k=sqrt(d).]
  48 // Aufwand: asymptotisch d^2.5 .
  49
  50         if (zerop(x))
  51                 return cl_float(1,x);
  52         var uintL d = float_digits(x);
  53         var sintL e = float_exponent(x);
  54         if (e <= (1-(sintL)d)>>1) // e <= (1-d)/2 <==> e <= -ceiling((d-1)/2) ?
  55                 return cl_float(1,x); // ja -> 1.0 als Ergebnis
  56  {      Mutable(cl_F,x);
  57         // Bei e <= -1-limit_slope*floor(sqrt(d)) kann die Potenzreihe
  58         // angewandt werden. Wähle limit_slope = 13/32 = 0.4.
  59         var sintL e_limit = -1-floor(isqrt(d)*13,32); // -1-floor(sqrt(d))
  60         if (e > e_limit) {
  61                 // e > -1-limit_slope*floor(sqrt(d)) -> muß |x| verkleinern.
  62                 x = scale_float(x,e_limit-e);
  63                 // Neuer Exponent = e_limit.
  64         }
  65         var cl_F x2 = square(x);        // x^2
  66         // Potenzreihe anwenden:
  67         var cl_F a = x2; // a := x^2
  68         var int i = 1;
  69         var cl_F b = cl_float(1,x); // b := (float 1 x)
  70         var cl_F sum = cl_float(0,x); // sum := (float 0 x)
  71         loop {
  72                 var cl_F new_sum = sum + b;
  73                 if (new_sum == sum) // = sum ?
  74                         break; // ja -> Potenzreihe abbrechen
  75                 sum = new_sum;
  76                 b = (b*a)/(cl_I)((i+1)*(i+2));
  77                 i = i+2;
  78         }
  79         var cl_F z = square(sum); // sum^2 als Ergebnis
  80         while (e > e_limit) {
  81                 z = z + x2 * square(z);
  82                 x2 = scale_float(x2,2); // x^2 := x^2*4
  83                 e_limit++;
  84         }
  85         return z;
  86 }}
  87 // Bit complexity (N = length(x)): O(N^(1/2)*M(N)).
  88
  89 const cl_LF sinhx_naive (const cl_LF& x)
  90 {
  91 // Methode:
  92 // e := Exponent aus (decode-float x), d := (float-digits x)
  93 // Bei x=0.0 oder e<=(1-d)/2 liefere x
  94 //   (denn bei e<=(1-d)/2 ist x^2/6 < x^2/4 < 2^(1-d)/4 = 2^(-d-1), also
  95 //   1 <= sinh(x)/x = 1+x^2/6+... < 1+2^(-d-1), also ist sinh(x)^2, auf d Bits
  96 //   gerundet, gleich x).
  97 // Bei e<=-sqrt(d) verwende die Potenzreihe
  98 //   sinh(x) = sum(j=0..inf,x*(x^2)^j/(2j+1)!):
  99 //   a:=x^2, b:=x, i:=1, sum:=0,
 100 //   while (/= sum (setq sum (+ sum b))) do b:=b*a/((i+1)*(i+2)), i:=i+2.
 101 //   Ergebnis sum^2.
 102 // Sonst setze y := x/2 = (scale-float x -1),
 103 //   berechne rekursiv z:=sinh(y)^2 und liefere 4*z*(1+z) = (1+2*z)^2-1.
 104 // [Die Grenze sqrt(d) ergibt sich so:
 105 //  Man braucht bei der Potenzreihe mit x=2^-k etwa j Glieder, mit
 106 //  k*j*ln 2 + j*(ln j - 1) = d, und der Aufwand beträgt etwa 2.8*(j/2)
 107 //  Multiplikationen von d-Bit-Zahlen. Bei Halbierungen bis x=2^-k ist der
 108 //  Gesamtaufwand etwa 2*(k+e)+1.4*j(k). Dieses minimieren nach k: Soll sein
 109 //  -1.4 = d/dk j(k) = (d/dj k(j))^-1 = - j^2/(d+j)*ln 2, also j^2=2(d+j),
 110 //  grob j=sqrt(2d) und damit k=sqrt(d).]
 111 // Aufwand: asymptotisch d^2.5 .
 112
 113         if (zerop(x))
 114                 return x;
 115         var uintL actuallen = TheLfloat(x)->len;
 116         var uintL d = float_digits(x);
 117         var sintL e = float_exponent(x);
 118         if (e <= (1-(sintL)d)>>1) // e <= (1-d)/2 <==> e <= -ceiling((d-1)/2) ?
 119                 return x; // ja -> x als Ergebnis
 120  {      Mutable(cl_LF,x);
 121         var sintL ee = e;
 122         // Bei e <= -1-limit_slope*floor(sqrt(d)) kann die Potenzreihe
 123         // angewandt werden. Ein guter Wert für naive1 ist limit_slope = 0.6,
 124         // für naive3 aber limit_slope = 0.5.
 125         var sintL e_limit = -1-floor(isqrt(d),2); // -1-floor(sqrt(d))
 126         if (e > e_limit) {
 127                 // e > -1-limit_slope*floor(sqrt(d)) -> muß |x| verkleinern.
 128                 x = scale_float(x,e_limit-e);
 129                 ee = e_limit; // Neuer Exponent = e_limit.
 130         }
 131         var cl_LF x2 = square(x); // x^2
 132         // Potenzreihe anwenden:
 133         var cl_LF powser_value;
 134         var cl_LF a = x2; // a := x^2
 135         var int i = 1;
 136         if (0) {
 137                 // naive1:
 138                 // fixed-point representation
 139                 d = d-ee; // fixed-point representation with d mantissa bits
 140                 var cl_I b = round1(scale_float(x,d)); // b := x
 141                 var cl_I sum = 0; // sum := (float 0 x)
 142                 loop {
 143                         if (b == 0) break;
 144                         sum = sum + b;
 145                         b = round1(round1(The(cl_LF)(b*a)),(cl_I)((i+1)*(i+2)));
 146                         i = i+2;
 147                 }
 148                 powser_value = scale_float(cl_float(sum,x),-d);
 149         } else if (actuallen <= 7) { // Break-even-Point before extendsqrt: N<=6
 150                 // naive2:
 151                 // floating-point representation
 152                 var cl_LF b = x; // b := x
 153                 var cl_LF sum = cl_float(0,x); // sum := (float 0 x)
 154                 loop {
 155                         var cl_LF new_sum = sum + b;
 156                         if (new_sum == sum) // = sum ?
 157                                 break; // ja -> Potenzreihe abbrechen
 158                         sum = new_sum;
 159                         b = (b*a)/(cl_I)((i+1)*(i+2));
 160                         i = i+2;
 161                 }
 162                 powser_value = sum;
 163         } else {
 164                 // naive3:
 165                 // floating-point representation with smooth precision reduction
 166                 var cl_LF b = x; // b := x
 167                 var cl_LF eps = scale_float(b,-(sintL)d-10);
 168                 var cl_LF sum = cl_float(0,x); // sum := (float 0 x)
 169                 loop {
 170                         var cl_LF new_sum = sum + LF_to_LF(b,actuallen);
 171                         if (new_sum == sum) // = sum ?
 172                                 break; // ja -> Potenzreihe abbrechen
 173                         sum = new_sum;
 174                         b = cl_LF_shortenwith(b,eps);
 175                         b = (b*a)/(cl_I)((i+1)*(i+2));
 176                         i = i+2;
 177                 }
 178                 powser_value = sum;
 179         }
 180         var cl_LF z = square(powser_value); // sinh^2 als Ergebnis
 181         while (e > e_limit) {
 182                 z = square(cl_float(1,x) + scale_float(z,1)) - cl_float(1,x); // z := (1+2*z)^2-1
 183                 e_limit++;
 184         }
 185         return z;
 186 }}
 187 // Bit complexity (N = length(x)): O(N^(1/2)*M(N)).
 188
 189 // Timings of the three variants, on an i486 33 MHz, running Linux,
 190 // applied to x = sqrt(2)-1 = 0.414...
 191 //   N     naive1  naive2  naive3  ratseries exp&recip
 192 //    4     0.0055  0.0039  0.0041  0.021     0.0046
 193 //    6     0.0073  0.0054  0.0054  0.029     0.0062
 194 //    8     0.0093  0.0075  0.0070  0.036     0.0081
 195 //   10     0.011   0.010   0.009   0.046     0.0011
 196 //   25     0.041   0.046   0.033   0.133     0.043
 197 //   50     0.14    0.18    0.12    0.36      0.16
 198 //  100     0.56    0.70    0.43    1.12      0.61
 199 //  250     3.5     4.5     2.7     5.3       3.3
 200 //  500    14.9    19.4    11.4    19.0      11.4
 201 // 1000    63      82      47      63        35
 202 // 2500   328     381     243     261       143
 203 // ==> naive2 fastest for N <= 6,
 204 //     naive3 fastest for 6 <= N <= 500,
 205 //     exp&recip (which uses exp's own ratseries) fastest for N >= 500.
 206