12 #include "cl_lfloat.h"
13 #include "cl_LF_tran.h"
15 #include "cl_integer.h"
16 #include "cl_alloca.h"
21 #define floor cln_floor
23 // Computing cosh(x) = sqrt(1+sinh(x)^2) instead of computing separately
24 // by a power series evaluation brings 20% speedup, even more for small lengths.
25 #define TRIVIAL_SPEEDUP
27 const cl_LF_cosh_sinh_t cl_coshsinh_aux (const cl_I& p, uintL lq, uintC len)
30 var uintL lp = integer_length(p); // now |p| < 2^lp.
31 if (!(lp <= lq)) cl_abort();
32 lp = lq - lp; // now |p/2^lq| < 2^-lp.
33 // Minimize lq (saves computation time).
35 var uintL lp2 = ord2(p);
42 // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
43 // with appropriate N, and
44 // a(n) = 1, b(n) = 1,
45 // p(n) = p^2 for n>0,
46 // q(n) = (2*n-1)*(2*n)*(2^lq)^2 for n>0.
48 // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
49 // with appropriate N, and
50 // a(n) = 1, b(n) = 1,
51 // p(0) = p, p(n) = p^2 for n>0,
52 // q(0) = 2^lq, q(n) = (2*n)*(2*n+1)*(2^lq)^2 for n>0.
53 var uintC actuallen = len+1; // 1 Schutz-Digit
54 // How many terms to we need for M bits of precision? N/2 terms suffice,
56 // 1/(2^(N*lp)*N!) < 2^-M
57 // <== N*(log(N)-1)+N*lp*log(2) > M*log(2)
58 // First approximation:
59 // N0 = M will suffice, so put N<=N0.
60 // Second approximation:
61 // N1 = floor(M*log(2)/(log(N0)-1+lp*log(2))), slightly too small,
63 // Third approximation:
64 // N2 = ceiling(M*log(2)/(log(N1)-1+lp*log(2))), slightly too large.
65 // N = N2+2, two more terms for safety.
66 var uintL N0 = intDsize*actuallen;
67 var uintL N1 = (uintL)(0.693147*intDsize*actuallen/(log((double)N0)-1.0+0.693148*lp));
68 var uintL N2 = (uintL)(0.693148*intDsize*actuallen/(log((double)N1)-1.0+0.693147*lp))+1;
72 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
73 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
74 var uintL* qsv = (uintL*) cl_alloca(N*sizeof(uintL));
76 var cl_I p2 = square(p);
79 init1(cl_I, pv[0]) (p);
80 init1(cl_I, qv[0]) ((cl_I)1 << lq);
81 for (n = 1; n < N; n++) {
82 init1(cl_I, pv[n]) (p2);
83 init1(cl_I, qv[n]) (((cl_I)n*(cl_I)(2*n+1)) << (2*lq+1));
85 var cl_pq_series series;
86 series.pv = pv; series.qv = qv; series.qsv = qsv;
87 sinhsum = eval_rational_series(N,series,actuallen);
88 for (n = 0; n < N; n++) {
93 #if !defined(TRIVIAL_SPEEDUP)
96 init1(cl_I, pv[0]) (1);
97 init1(cl_I, qv[0]) (1);
98 for (n = 1; n < N; n++) {
99 init1(cl_I, pv[n]) (p2);
100 init1(cl_I, qv[n]) (((cl_I)n*(cl_I)(2*n-1)) << (2*lq+1));
102 var cl_pq_series series;
103 series.pv = pv; series.qv = qv; series.qsv = qsv;
104 coshsum = eval_rational_series(N,series,actuallen);
105 for (n = 0; n < N; n++) {
110 #else // TRIVIAL_SPEEDUP
111 var cl_LF coshsum = sqrt(cl_I_to_LF(1,actuallen) + square(sinhsum));
113 return cl_LF_cosh_sinh_t(shorten(coshsum,len),shorten(sinhsum,len)); // verkürzen und fertig
115 // Bit complexity (N = len, and if p has length O(log N) and ql = O(log N)):