src/float/transcendental/cl_LF_coshsinh_aux.cc

   1 // cl_coshsinh_aux().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 // Specification.
   7 #include "cl_F_tran.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cl_lfloat.h"
  13 #include "cl_LF_tran.h"
  14 #include "cl_LF.h"
  15 #include "cl_integer.h"
  16 #include "cl_alloca.h"
  17 #include "cl_abort.h"
  18
  19 #undef floor
  20 #include <math.h>
  21 #define floor cln_floor
  22
  23 // Computing cosh(x) = sqrt(1+sinh(x)^2) instead of computing separately
  24 // by a power series evaluation brings 20% speedup, even more for small lengths.
  25 #define TRIVIAL_SPEEDUP
  26
  27 const cl_LF_cosh_sinh_t cl_coshsinh_aux (const cl_I& p, uintL lq, uintC len)
  28 {
  29  {      Mutable(cl_I,p);
  30         var uintL lp = integer_length(p); // now |p| < 2^lp.
  31         if (!(lp <= lq)) cl_abort();
  32         lp = lq - lp; // now |p/2^lq| < 2^-lp.
  33         // Minimize lq (saves computation time).
  34         {
  35                 var uintL lp2 = ord2(p);
  36                 if (lp2 > 0) {
  37                         p = p >> lp2;
  38                         lq = lq - lp2;
  39                 }
  40         }
  41         // cosh(p/2^lq):
  42         // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
  43         // with appropriate N, and
  44         //   a(n) = 1, b(n) = 1,
  45         //   p(n) = p^2 for n>0,
  46         //   q(n) = (2*n-1)*(2*n)*(2^lq)^2 for n>0.
  47         // sinh(p/2^lq):
  48         // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
  49         // with appropriate N, and
  50         //   a(n) = 1, b(n) = 1,
  51         //   p(0) = p, p(n) = p^2 for n>0,
  52         //   q(0) = 2^lq, q(n) = (2*n)*(2*n+1)*(2^lq)^2 for n>0.
  53         var uintC actuallen = len+1; // 1 Schutz-Digit
  54         // How many terms to we need for M bits of precision? N/2 terms suffice,
  55         // provided that
  56         //   1/(2^(N*lp)*N!) < 2^-M
  57         // <==   N*(log(N)-1)+N*lp*log(2) > M*log(2)
  58         // First approximation:
  59         //   N0 = M will suffice, so put N<=N0.
  60         // Second approximation:
  61         //   N1 = floor(M*log(2)/(log(N0)-1+lp*log(2))), slightly too small,
  62         //   so put N>=N1.
  63         // Third approximation:
  64         //   N2 = ceiling(M*log(2)/(log(N1)-1+lp*log(2))), slightly too large.
  65         //   N = N2+2, two more terms for safety.
  66         var uintL N0 = intDsize*actuallen;
  67         var uintL N1 = (uintL)(0.693147*intDsize*actuallen/(log((double)N0)-1.0+0.693148*lp));
  68         var uintL N2 = (uintL)(0.693148*intDsize*actuallen/(log((double)N1)-1.0+0.693147*lp))+1;
  69         var uintL N = N2+2;
  70         N = ceiling(N,2);
  71         CL_ALLOCA_STACK;
  72         var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  73         var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  74         var uintL* qsv = (uintL*) cl_alloca(N*sizeof(uintL));
  75         var uintL n;
  76         var cl_I p2 = square(p);
  77         var cl_LF sinhsum;
  78         {
  79                 init1(cl_I, pv[0]) (p);
  80                 init1(cl_I, qv[0]) ((cl_I)1 << lq);
  81                 for (n = 1; n < N; n++) {
  82                         init1(cl_I, pv[n]) (p2);
  83                         init1(cl_I, qv[n]) (((cl_I)n*(cl_I)(2*n+1)) << (2*lq+1));
  84                 }
  85                 var cl_pq_series series;
  86                 series.pv = pv; series.qv = qv; series.qsv = qsv;
  87                 sinhsum = eval_rational_series(N,series,actuallen);
  88                 for (n = 0; n < N; n++) {
  89                         pv[n].~cl_I();
  90                         qv[n].~cl_I();
  91                 }
  92         }
  93         #if !defined(TRIVIAL_SPEEDUP)
  94         var cl_LF coshsum;
  95         {
  96                 init1(cl_I, pv[0]) (1);
  97                 init1(cl_I, qv[0]) (1);
  98                 for (n = 1; n < N; n++) {
  99                         init1(cl_I, pv[n]) (p2);
 100                         init1(cl_I, qv[n]) (((cl_I)n*(cl_I)(2*n-1)) << (2*lq+1));
 101                 }
 102                 var cl_pq_series series;
 103                 series.pv = pv; series.qv = qv; series.qsv = qsv;
 104                 coshsum = eval_rational_series(N,series,actuallen);
 105                 for (n = 0; n < N; n++) {
 106                         pv[n].~cl_I();
 107                         qv[n].~cl_I();
 108                 }
 109         }
 110         #else // TRIVIAL_SPEEDUP
 111         var cl_LF coshsum = sqrt(cl_I_to_LF(1,actuallen) + square(sinhsum));
 112         #endif
 113         return cl_LF_cosh_sinh_t(shorten(coshsum,len),shorten(sinhsum,len)); // verkürzen und fertig
 114 }}
 115 // Bit complexity (N = len, and if p has length O(log N) and ql = O(log N)):
 116 // O(log(N)*M(N)).
 117