src/float/transcendental/cl_LF_cossin_aux.cc

   1 // cl_cossin_aux().
   2
   3 // General includes.
   4 #include "base/cl_sysdep.h"
   5
   6 // Specification.
   7 #include "float/transcendental/cl_F_tran.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cln/lfloat.h"
  13 #include "float/transcendental/cl_LF_tran.h"
  14 #include "float/lfloat/cl_LF.h"
  15 #include "cln/integer.h"
  16 #include "base/cl_alloca.h"
  17 #include "cln/exception.h"
  18
  19 #undef floor
  20 #include <cmath>
  21 #define floor cln_floor
  22
  23 namespace cln {
  24
  25 // Computing cos(x) = sqrt(1-sin(x)^2) instead of computing separately
  26 // by a power series evaluation brings 20% speedup, even more for small lengths.
  27 #define TRIVIAL_SPEEDUP
  28
  29 const cl_LF_cos_sin_t cl_cossin_aux (const cl_I& p, uintE lq, uintC len)
  30 {
  31  {      Mutable(cl_I,p);
  32         var uintE lp = integer_length(p); // now |p| < 2^lp.
  33         if (!(lp <= lq)) throw runtime_exception();
  34         lp = lq - lp; // now |p/2^lq| < 2^-lp.
  35         // Minimize lq (saves computation time).
  36         {
  37                 var uintC lp2 = ord2(p);
  38                 if (lp2 > 0) {
  39                         p = p >> lp2;
  40                         lq = lq - lp2;
  41                 }
  42         }
  43         // cos(p/2^lq):
  44         // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
  45         // with appropriate N, and
  46         //   a(n) = 1, b(n) = 1,
  47         //   p(n) = -p^2 for n>0,
  48         //   q(n) = (2*n-1)*(2*n)*(2^lq)^2 for n>0.
  49         // sin(p/2^lq):
  50         // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
  51         // with appropriate N, and
  52         //   a(n) = 1, b(n) = 1,
  53         //   p(0) = p, p(n) = -p^2 for n>0,
  54         //   q(0) = 2^lq, q(n) = (2*n)*(2*n+1)*(2^lq)^2 for n>0.
  55         var uintC actuallen = len+1; // 1 guard digit
  56         // How many terms do we need for M bits of precision? N/2 terms suffice,
  57         // provided that
  58         //   1/(2^(N*lp)*N!) < 2^-M
  59         // <==   N*(log(N)-1)+N*lp*log(2) > M*log(2)
  60         // First approximation:
  61         //   N0 = M will suffice, so put N<=N0.
  62         // Second approximation:
  63         //   N1 = floor(M*log(2)/(log(N0)-1+lp*log(2))), slightly too small,
  64         //   so put N>=N1.
  65         // Third approximation:
  66         //   N2 = ceiling(M*log(2)/(log(N1)-1+lp*log(2))), slightly too large.
  67         //   N = N2+2, two more terms for safety.
  68         var uintC N0 = intDsize*actuallen;
  69         var uintC N1 = (uintC)(0.693147*intDsize*actuallen/(::log((double)N0)-1.0+0.693148*lp));
  70         var uintC N2 = (uintC)(0.693148*intDsize*actuallen/(::log((double)N1)-1.0+0.693147*lp))+1;
  71         var uintC N = N2+2;
  72         N = ceiling(N,2);
  73         CL_ALLOCA_STACK;
  74         var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  75         var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  76         var uintC n;
  77         var cl_I p2 = -square(p);
  78         var cl_LF sinsum;
  79         {
  80                 init1(cl_I, pv[0]) (p);
  81                 init1(cl_I, qv[0]) ((cl_I)1 << lq);
  82                 for (n = 1; n < N; n++) {
  83                         init1(cl_I, pv[n]) (p2);
  84                         init1(cl_I, qv[n]) (((cl_I)n*(cl_I)(2*n+1)) << (2*lq+1));
  85                 }
  86                 var cl_pq_series series;
  87                 series.pv = pv; series.qv = qv;
  88                 sinsum = eval_rational_series<true>(N,series,actuallen);
  89                 for (n = 0; n < N; n++) {
  90                         pv[n].~cl_I();
  91                         qv[n].~cl_I();
  92                 }
  93         }
  94         #if !defined(TRIVIAL_SPEEDUP)
  95         var cl_LF cossum;
  96         {
  97                 init1(cl_I, pv[0]) (1);
  98                 init1(cl_I, qv[0]) (1);
  99                 for (n = 1; n < N; n++) {
 100                         init1(cl_I, pv[n]) (p2);
 101                         init1(cl_I, qv[n]) (((cl_I)n*(cl_I)(2*n-1)) << (2*lq+1));
 102                 }
 103                 var cl_pq_series series;
 104                 series.pv = pv; series.qv = qv;
 105                 cossum = eval_rational_series<true>(N,series,actuallen);
 106                 for (n = 0; n < N; n++) {
 107                         pv[n].~cl_I();
 108                         qv[n].~cl_I();
 109                 }
 110         }
 111         #else // TRIVIAL_SPEEDUP
 112         var cl_LF cossum = sqrt(cl_I_to_LF(1,actuallen) - square(sinsum));
 113         #endif
 114         return cl_LF_cos_sin_t(shorten(cossum,len),shorten(sinsum,len)); // verkürzen und fertig
 115 }}
 116 // Bit complexity (N = len, and if p has length O(log N) and ql = O(log N)):
 117 // O(log(N)*M(N)).
 118
 119 }  // namespace cln