]> www.ginac.de Git - cln.git/blob - src/float/transcendental/cl_LF_catalanconst.cc
Finalize CLN 1.3.7 release.
[cln.git] / src / float / transcendental / cl_LF_catalanconst.cc
1 // catalanconst().
2
3 // General includes.
4 #include "base/cl_sysdep.h"
5
6 // Specification.
7 #include "float/transcendental/cl_F_tran.h"
8
9
10 // Implementation.
11
12 #include "cln/lfloat.h"
13 #include "float/transcendental/cl_LF_tran.h"
14 #include "float/lfloat/cl_LF.h"
15 #include "cln/integer.h"
16 #include "base/cl_alloca.h"
17
18 namespace cln {
19
20 const cl_LF compute_catalanconst_ramanujan (uintC len)
21 {
22         // [Jonathan M. Borwein, Peter B. Borwein: Pi and the AGM.
23         //  Wiley 1987. Section 11.3, exercise 16 g, p. 386]
24         // G = 3/8 * sum(n=0..infty, n!^2 / (2n+1)!*(2n+1))
25         //     + pi/8 * log(2+sqrt(3)).
26         // Every summand gives 0.6 new decimal digits in precision.
27         // The sum is best evaluated using fixed-point arithmetic,
28         // so that the precision is reduced for the later summands.
29         var uintC actuallen = len + 2; // 2 guard digits
30         var sintC scale = intDsize*actuallen;
31         var cl_I sum = 0;
32         var cl_I n = 0;
33         var cl_I factor = ash(1,scale);
34         while (!zerop(factor)) {
35                 sum = sum + truncate1(factor,2*n+1);
36                 n = n+1;
37                 factor = truncate1(factor*n,2*(2*n+1));
38         }
39         var cl_LF fsum = scale_float(cl_I_to_LF(sum,actuallen),-scale);
40         var cl_LF g =
41           scale_float(The(cl_LF)(3*fsum)
42                       + The(cl_LF)(pi(actuallen))
43                         * ln(cl_I_to_LF(2,actuallen)+sqrt(cl_I_to_LF(3,actuallen))),
44                       -3);
45         return shorten(g,len); // verkürzen und fertig
46 }
47 // Bit complexity (N := len): O(N^2).
48
49 const cl_LF compute_catalanconst_ramanujan_fast (uintC len)
50 {
51         // Same formula as above, using a binary splitting evaluation.
52         // See [Borwein, Borwein, section 10.2.3].
53         struct rational_series_stream : cl_pqb_series_stream {
54                 cl_I n;
55                 static cl_pqb_series_term computenext (cl_pqb_series_stream& thisss)
56                 {
57                         var rational_series_stream& thiss = (rational_series_stream&)thisss;
58                         var cl_I n = thiss.n;
59                         var cl_pqb_series_term result;
60                         if (n==0) {
61                                 result.p = 1;
62                                 result.q = 1;
63                                 result.b = 1;
64                         } else {
65                                 result.p = n;
66                                 result.b = 2*n+1;
67                                 result.q = result.b << 1; // 2*(2*n+1)
68                         }
69                         thiss.n = n+1;
70                         return result;
71                 }
72                 rational_series_stream ()
73                         : cl_pqb_series_stream (rational_series_stream::computenext),
74                           n (0) {}
75         } series;
76         var uintC actuallen = len + 2; // 2 guard digits
77         // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
78         // with appropriate N, and
79         //   a(n) = 1, b(n) = 2*n+1,
80         //   p(n) = n for n>0, q(n) = 2*(2*n+1) for n>0.
81         var uintC N = (intDsize/2)*actuallen;
82         // 4^-N <= 2^(-intDsize*actuallen).
83         var cl_LF fsum = eval_rational_series<false>(N,series,actuallen,actuallen);
84         var cl_LF g =
85           scale_float(The(cl_LF)(3*fsum)
86                       + The(cl_LF)(pi(actuallen))
87                         * ln(cl_I_to_LF(2,actuallen)+sqrt(cl_I_to_LF(3,actuallen))),
88                       -3);
89         return shorten(g,len); // verkürzen und fertig
90 }
91 // Bit complexity (N := len): O(log(N)^2*M(N)).
92
93 const cl_LF compute_catalanconst_expintegral1 (uintC len)
94 {
95         // We compute f(x) classically and g(x) using the partial sums of f(x).
96         var uintC actuallen = len+2; // 2 guard digits
97         var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
98         var uintC N = (uintC)(2.718281828*x);
99         var cl_LF fterm = cl_I_to_LF(1,actuallen);
100         var cl_LF fsum = fterm;
101         var cl_LF gterm = fterm;
102         var cl_LF gsum = gterm;
103         var uintC n;
104         // After n loops
105         //   fterm = x^n/n!, fsum = 1 + x/1! + ... + x^n/n!,
106         //   gterm = S_n*x^n/n!, gsum = S_0*x^0/0! + ... + S_n*x^n/n!.
107         for (n = 1; n < N; n++) {
108                 fterm = The(cl_LF)(fterm*x)/n;
109                 fsum = fsum + fterm;
110                 gterm = The(cl_LF)(gterm*x)/n;
111                 if (evenp(n))
112                         gterm = gterm + fterm/square((cl_I)(2*n+1));
113                 else
114                         gterm = gterm - fterm/square((cl_I)(2*n+1));
115                 gsum = gsum + gterm;
116         }
117         var cl_LF result = gsum/fsum;
118         return shorten(result,len); // verkürzen und fertig
119 }
120 // Bit complexity (N = len): O(N^2).
121
122 // Same algorithm as expintegral1, but using binary splitting to evaluate
123 // the sums.
124 const cl_LF compute_catalanconst_expintegral2 (uintC len)
125 {
126         var uintC actuallen = len+2; // 2 guard digits
127         var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
128         var uintC N = (uintC)(2.718281828*x);
129         CL_ALLOCA_STACK;
130         var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
131         var uintC n;
132         for (n = 0; n < N; n++) {
133                 if (n==0) {
134                         init1(cl_I, args[n].p) (1);
135                         init1(cl_I, args[n].q) (1);
136                 } else {
137                         init1(cl_I, args[n].p) (x);
138                         init1(cl_I, args[n].q) (n);
139                 }
140                 init1(cl_I, args[n].d) (evenp(n)
141                                         ? square((cl_I)(2*n+1))
142                                         : -square((cl_I)(2*n+1)));
143         }
144         var cl_LF result = eval_pqd_series(N,args,actuallen);
145         for (n = 0; n < N; n++) {
146                 args[n].p.~cl_I();
147                 args[n].q.~cl_I();
148                 args[n].d.~cl_I();
149         }
150         return shorten(result,len); // verkürzen und fertig
151 }
152 // Bit complexity (N = len): O(log(N)^2*M(N)).
153
154 // Using Cohen-Villegas-Zagier acceleration, but without binary splitting.
155 const cl_LF compute_catalanconst_cvz1 (uintC len)
156 {
157         var uintC actuallen = len+2; // 2 guard digits
158         var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1;
159 #if 0
160         var cl_LF fterm = cl_I_to_LF(2*(cl_I)N*(cl_I)N,actuallen);
161         var cl_LF fsum = fterm;
162         var cl_LF gterm = fterm;
163         var cl_LF gsum = gterm;
164         var uintC n;
165         // After n loops
166         //   fterm = (N+n)!N/(2n+2)!(N-n-1)!*2^(2n+2), fsum = ... + fterm,
167         //   gterm = S_n*fterm, gsum = ... + gterm.
168         for (n = 1; n < N; n++) {
169                 fterm = The(cl_LF)(fterm*(2*(cl_I)(N-n)*(cl_I)(N+n)))/((cl_I)(2*n+1)*(cl_I)(n+1));
170                 fsum = fsum + fterm;
171                 gterm = The(cl_LF)(gterm*(2*(cl_I)(N-n)*(cl_I)(N+n)))/((cl_I)(2*n+1)*(cl_I)(n+1));
172                 if (evenp(n))
173                         gterm = gterm + fterm/square((cl_I)(2*n+1));
174                 else
175                         gterm = gterm - fterm/square((cl_I)(2*n+1));
176                 gsum = gsum + gterm;
177         }
178         var cl_LF result = gsum/(cl_I_to_LF(1,actuallen)+fsum);
179 #else
180         // Take advantage of the fact that fterm and fsum are integers.
181         var cl_I fterm = 2*(cl_I)N*(cl_I)N;
182         var cl_I fsum = fterm;
183         var cl_LF gterm = cl_I_to_LF(fterm,actuallen);
184         var cl_LF gsum = gterm;
185         var uintC n;
186         // After n loops
187         //   fterm = (N+n)!N/(2n+2)!(N-n-1)!*2^(2n+2), fsum = ... + fterm,
188         //   gterm = S_n*fterm, gsum = ... + gterm.
189         for (n = 1; n < N; n++) {
190                 fterm = exquopos(fterm*(2*(cl_I)(N-n)*(cl_I)(N+n)),(cl_I)(2*n+1)*(cl_I)(n+1));
191                 fsum = fsum + fterm;
192                 gterm = The(cl_LF)(gterm*(2*(cl_I)(N-n)*(cl_I)(N+n)))/((cl_I)(2*n+1)*(cl_I)(n+1));
193                 if (evenp(n))
194                         gterm = gterm + cl_I_to_LF(fterm,actuallen)/square((cl_I)(2*n+1));
195                 else
196                         gterm = gterm - cl_I_to_LF(fterm,actuallen)/square((cl_I)(2*n+1));
197                 gsum = gsum + gterm;
198         }
199         var cl_LF result = gsum/cl_I_to_LF(1+fsum,actuallen);
200 #endif
201         return shorten(result,len); // verkürzen und fertig
202 }
203 // Bit complexity (N = len): O(N^2).
204
205 // Using Cohen-Villegas-Zagier acceleration, with binary splitting.
206 const cl_LF compute_catalanconst_cvz2 (uintC len)
207 {
208         var uintC actuallen = len+2; // 2 guard digits
209         var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1;
210         CL_ALLOCA_STACK;
211         var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
212         var uintC n;
213         for (n = 0; n < N; n++) {
214                 init1(cl_I, args[n].p) (2*(cl_I)(N-n)*(cl_I)(N+n));
215                 init1(cl_I, args[n].q) ((cl_I)(2*n+1)*(cl_I)(n+1));
216                 init1(cl_I, args[n].d) (evenp(n)
217                                         ? square((cl_I)(2*n+1))
218                                         : -square((cl_I)(2*n+1)));
219         }
220         var cl_pqd_series_result<cl_I> sums;
221         eval_pqd_series_aux(N,args,sums);
222         // Here we need U/(1+S) = V/D(Q+T).
223         var cl_LF result =
224           cl_I_to_LF(sums.V,actuallen) / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen));
225         for (n = 0; n < N; n++) {
226                 args[n].p.~cl_I();
227                 args[n].q.~cl_I();
228                 args[n].d.~cl_I();
229         }
230         return shorten(result,len); // verkürzen und fertig
231 }
232 // Bit complexity (N = len): O(log(N)^2*M(N)).
233
234
235 const cl_LF compute_catalanconst_lupas (uintC len)
236 {
237         // [Alexandru Lupas. Formulae for Some Classical Constants.
238         //  Proceedings of ROGER-2000.
239         //  http://www.lacim.uqam.ca/~plouffe/articles/alupas1.pdf]
240         // [http://mathworld.wolfram.com/CatalansConstant.html]
241         // G = 19/18 * sum(n=0..infty,
242         //                 mul(m=1..n, -32*((80*m^3+72*m^2-18*m-19)*m^3)/
243         //                             (10240*m^6+14336*m^5+2560*m^4-3072*m^3-888*m^2+72*m+27))).
244         struct rational_series_stream : cl_pq_series_stream {
245                 cl_I n;
246                 static cl_pq_series_term computenext (cl_pq_series_stream& thisss)
247                 {
248                         var rational_series_stream& thiss = (rational_series_stream&)thisss;
249                         var cl_I n = thiss.n;
250                         var cl_pq_series_term result;
251                         if (zerop(n)) {
252                                 result.p = 1;
253                                 result.q = 1;
254                         } else {
255                                 // Compute -32*((80*n^3+72*n^2-18*n-19)*n^3) (using Horner scheme):
256                                 result.p = (19+(18+(-72-80*n)*n)*n)*n*n*n << 5;
257                                 // Compute 10240*n^6+14336*n^5+2560*n^4-3072*n^3-888*n^2+72*n+27:
258                                 result.q = 27+(72+(-888+(-3072+(2560+(14336+10240*n)*n)*n)*n)*n)*n;
259                         }
260                         thiss.n = plus1(n);
261                         return result;
262                 }
263                 rational_series_stream ()
264                         : cl_pq_series_stream (rational_series_stream::computenext),
265                           n (0) {}
266         } series;
267         var uintC actuallen = len + 2; // 2 guard digits
268         var uintC N = (intDsize/2)*actuallen;
269         var cl_LF fsum = eval_rational_series<false>(N,series,actuallen,actuallen);
270         var cl_LF g = fsum*cl_I_to_LF(19,actuallen)/cl_I_to_LF(18,actuallen);
271         return shorten(g,len);
272 }
273 // Bit complexity (N = len): O(log(N)^2*M(N)).
274
275 // Timings of the above algorithms, on an AMD Opteron 1.7 GHz, running Linux/x86.
276 //    N      ram    ramfast  exp1    exp2    cvz1    cvz2    lupas
277 //    25     0.0011  0.0010  0.0094  0.0107  0.0021  0.0016  0.0034
278 //    50     0.0030  0.0025  0.0280  0.0317  0.0058  0.0045  0.0095
279 //   100     0.0087  0.0067  0.0854  0.0941  0.0176  0.0121  0.0260
280 //   250     0.043   0.029   0.462   0.393   0.088   0.055   0.109
281 //   500     0.15    0.086   1.7     1.156   0.323   0.162   0.315
282 //  1000     0.57    0.25    7.5     3.23    1.27    0.487   0.864
283 //  2500     3.24    1.10   52.2    12.4     8.04    2.02    3.33
284 //  5000    13.1     3.06  218      32.7    42.1     5.59    8.80
285 // 10000    52.7     8.2   910      85.3   216.7    15.3    22.7
286 // 25000   342      29.7  6403     295    1643      54.5    77.3
287 // 50000            89.9                           139     195
288 //100000           227                             363     483
289 // asymp.    N^2     FAST    N^2     FAST    N^2     FAST    FAST
290
291 // (FAST means O(log(N)^2*M(N)))
292 //
293 // The "ramfast" algorithm is always fastest.
294
295 const cl_LF compute_catalanconst (uintC len)
296 {
297         return compute_catalanconst_ramanujan_fast(len);
298 }
299 // Bit complexity (N := len): O(log(N)^2*M(N)).
300
301 const cl_LF catalanconst (uintC len)
302 {
303         var uintC oldlen = TheLfloat(cl_LF_catalanconst())->len; // vorhandene Länge
304         if (len < oldlen)
305                 return shorten(cl_LF_catalanconst(),len);
306         if (len == oldlen)
307                 return cl_LF_catalanconst();
308
309         // TheLfloat(cl_LF_catalanconst())->len um mindestens einen konstanten Faktor
310         // > 1 wachsen lassen, damit es nicht zu häufig nachberechnet wird:
311         var uintC newlen = len;
312         oldlen += floor(oldlen,2); // oldlen * 3/2
313         if (newlen < oldlen)
314                 newlen = oldlen;
315
316         // gewünschte > vorhandene Länge -> muß nachberechnen:
317         cl_LF_catalanconst() = compute_catalanconst(newlen);
318         return (len < newlen ? shorten(cl_LF_catalanconst(),len) : cl_LF_catalanconst());
319 }
320
321 }  // namespace cln