12 #include "cln/lfloat.h"
13 #include "cl_LF_tran.h"
15 #include "cln/integer.h"
17 #include "cl_alloca.h"
21 #if 0 // works, but besselintegral4 is always faster
23 const cl_LF compute_eulerconst_expintegral (uintC len)
25 // [Jonathan M. Borwein, Peter B. Borwein: Pi and the AGM.
26 // Wiley 1987. Section 10.2.3, exercise 11, p. 336]
27 // Use the following formula for the modified exponential integral
28 // (valid for Re(z) > 0)
29 // E1(z) := integral(t = z..+infty, exp(-t)/t dt)
30 // E1(z) = - log z - C + sum(n=1..infty, (-1)^(n-1) z^n / (n*n!))
31 // [Hint for proving this formula:
32 // 1. Learn about the elementary properties of the Gamma function.
33 // 2. -C = derivative of Gamma at 1
34 // = lim_{z -> 0} (Gamma(z) - 1/z)
35 // = integral(t = 0..1, (exp(-t)-1)/t dt)
36 // + integral(t = 1..infty, exp(-t)/t dt)
38 // 0 = integral(t=0..1, 1/(t+1)) - integral(t=1..infty, 1/t(t+1) dt)
40 // -C = integral(t = 0..infty, (exp(-t)/t - 1/t(t+1)) dt)
41 // 4. Compute E1(z) + C and note that E1(z) + C + log z is the integral
42 // of an entire function, hence an entire function as well.]
43 // Of course we also have the estimate
44 // |E1(z)| < exp(-Re(z)).
45 // This means that we can get C by computing
46 // sum(n=1..infty, (-1)^(n-1) z^n / (n*n!)) - log z
48 // In order to get M bits of precision, we first choose z (real) such
49 // that exp(-z) < 2^-M. This will make |E1(z)| small enough. z should
50 // be chosen as an integer, this is the key to computing the series
51 // sum very fast. z = M*log(2) + O(1).
52 // Then we choose the number N of terms:
53 // Note than the n-th term's absolute value is (logarithmically)
54 // n*log(z) - n*log(n) + n - 3/2*log(n) - log(sqrt(2 pi)) + o(1).
55 // The derivative of this with respect to n is
56 // log(z) - log(n) - 3/(2n) + o(1/n),
57 // hence is increasing for n < z and decreasing for n > z.
58 // The maximum value is attained at n = z + O(1), and is z + O(log z),
59 // which means that we need z/log(2) + O(log z) bits before the
61 // We can cut off the series when
62 // n*log(z) - n*log(n) + n - 3/2*log(n) - log(sqrt(2 pi)) < -M*log(2)
63 // This happens at n = alpha*z - 3/(2*log(alpha))*log(z) + O(1),
64 // where alpha = 3.591121477... is the solution of
65 // -alpha*log(alpha) + alpha + 1 = 0.
66 // [Use the Newton iteration alpha --> (alpha+1)/log(alpha) to
67 // compute this number.]
68 // Finally we compute the series's sum as
69 // sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
70 // with a(n) = 1, b(n) = n+1, p(n) = z for n=0, -z for n>0, q(n) = n+1.
71 // If we computed this with floating-point numbers, we would have
72 // to more than double the floating-point precision because of the large
73 // extinction which takes place. But luckily we compute with integers.
74 var uintC actuallen = len+1; // 1 Schutz-Digit
75 var uintC z = (uintC)(0.693148*intDsize*actuallen)+1;
76 var uintC N = (uintC)(3.591121477*z);
78 var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
79 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
80 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
82 for (n = 0; n < N; n++) {
83 init1(cl_I, bv[n]) (n+1);
84 init1(cl_I, pv[n]) (n==0 ? (cl_I)z : -(cl_I)z);
85 init1(cl_I, qv[n]) (n+1);
87 var cl_pqb_series series;
89 series.pv = pv; series.qv = qv; series.qsv = NULL;
90 var cl_LF fsum = eval_rational_series(N,series,actuallen);
91 for (n = 0; n < N; n++) {
96 fsum = fsum - ln(cl_I_to_LF(z,actuallen)); // log(z) subtrahieren
97 return shorten(fsum,len); // verkürzen und fertig
99 // Bit complexity (N = len): O(log(N)^2*M(N)).
103 #if 0 // works, but besselintegral1 is twice as fast
105 const cl_LF compute_eulerconst_expintegral1 (uintC len)
107 // Define f(z) := sum(n=0..infty, z^n/n!) = exp(z)
108 // and g(z) := sum(n=0..infty, H_n*z^n/n!)
109 // where H_n := 1/1 + 1/2 + ... + 1/n.
110 // The following formula can be proved:
111 // g'(z) - g(z) = (exp(z)-1)/z,
112 // g(z) = exp(z)*(log(z) + c3 + integral(t=..z, exp(-t)/t dt))
113 // The Laplace method for determining the asymptotics of an integral
114 // or sum yields for real x>0 (the terms n = x+O(x^(1/2)) are
116 // f(x) = exp(x)*(1 + O(x^(-1/2)))
117 // g(x) = exp(x)*(log(x) + C + O(log(x)*x^(-1/2)))
119 // g(x)/f(x) - log(x) - C = O(log(x)*x^(-1/2))
120 // This determines the constant c3, we thus have
121 // g(z) = exp(z)*(log(z) + C + integral(t=z..infty, exp(-t)/t dt))
122 // Hence we have for x -> infty:
123 // g(x)/f(x) - log(x) - C == O(exp(-x))
124 // This means that we can get C by computing
125 // g(x)/f(x) - log(x)
127 // In order to get M bits of precision, we first choose x (real) such
128 // that exp(-x) < 2^-M. This will make the absolute value of the
129 // integral small enough. x should be chosen as an integer, this is
130 // the key to computing the series sum very fast. x = M*log(2) + O(1).
131 // Then we choose the number N of terms:
132 // Note than the n-th term's absolute value is (logarithmically)
133 // n*log(x) - n*log(n) + n - 1/2*log(n) - 1/2*log(2 pi) + o(1).
134 // The derivative of this with respect to n is
135 // log(x) - log(n) - 1/2n + o(1/n),
136 // hence is increasing for n < x and decreasing for n > x.
137 // The maximum value is attained at n = x + O(1), and is
138 // x + O(log x), which means that we need x/log(2) + O(log x)
139 // bits before the decimal point. This also follows from the
140 // asymptotic estimate for f(x).
141 // We can cut off the series when the relative error is < 2^-M,
142 // i.e. when the absolute error is < 2^-M*exp(x), i.e.
143 // n*log(x) - n*log(n) + n - 1/2*log(n) - 1/2*log(2 pi) <
145 // This happens at n = e*x - 1/2*log(x) + O(1).
146 // Finally we compute the sums of the series f(x) and g(x) with N terms
148 // We compute f(x) classically and g(x) using the partial sums of f(x).
149 var uintC actuallen = len+2; // 2 Schutz-Digits
150 var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
151 var uintC N = (uintC)(2.718281828*x);
152 var cl_LF one = cl_I_to_LF(1,actuallen);
153 var cl_LF fterm = one;
154 var cl_LF fsum = fterm;
155 var cl_LF gterm = cl_I_to_LF(0,actuallen);
156 var cl_LF gsum = gterm;
159 // fterm = x^n/n!, fsum = 1 + x/1! + ... + x^n/n!,
160 // gterm = H_n*x^n/n!, gsum = H_1*x/1! + ... + H_n*x^n/n!.
161 for (n = 1; n < N; n++) {
162 fterm = The(cl_LF)(fterm*x)/n;
163 gterm = (The(cl_LF)(gterm*x) + fterm)/n;
164 if (len < 10 || n <= x) {
168 // For n > x, the terms are decreasing.
169 // So we can reduce the precision accordingly.
170 fterm = cl_LF_shortenwith(fterm,one);
171 gterm = cl_LF_shortenwith(gterm,one);
172 fsum = fsum + LF_to_LF(fterm,actuallen);
173 gsum = gsum + LF_to_LF(gterm,actuallen);
176 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(x,actuallen));
177 return shorten(result,len); // verkürzen und fertig
179 // Bit complexity (N = len): O(N^2).
183 #if 0 // works, but besselintegral4 is always faster
185 // Same algorithm as expintegral1, but using binary splitting to evaluate
187 const cl_LF compute_eulerconst_expintegral2 (uintC len)
189 var uintC actuallen = len+2; // 2 Schutz-Digits
190 var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
191 var uintC N = (uintC)(2.718281828*x);
193 var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
195 for (n = 0; n < N; n++) {
196 init1(cl_I, args[n].p) (x);
197 init1(cl_I, args[n].q) (n+1);
198 init1(cl_I, args[n].d) (n+1);
200 var cl_pqd_series_result sums;
201 eval_pqd_series_aux(N,args,sums);
202 // Instead of computing fsum = 1 + T/Q and gsum = V/(D*Q)
203 // and then dividing them, to compute gsum/fsum, we save two
204 // divisions by computing V/(D*(Q+T)).
206 cl_I_to_LF(sums.V,actuallen)
207 / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen))
208 - ln(cl_I_to_LF(x,actuallen));
209 for (n = 0; n < N; n++) {
214 return shorten(result,len); // verkürzen und fertig
216 // Bit complexity (N = len): O(log(N)^2*M(N)).
220 // cl_LF compute_eulerconst_besselintegral (uintC len)
221 // This is basically the algorithm used in Pari.
222 // Define f(z) := sum(n=0..infty, z^n/n!^2)
223 // and g(z) := sum(n=0..infty, H_n*z^n/n!^2)
224 // where H_n := 1/1 + 1/2 + ... + 1/n.
225 // [f(z) and g(z) are intimately related to the Bessel functions:
226 // f(x^2) = I_0(2*x), g(x^2) = K_0(2*x) + I_0(2*x*)*(C + log(x)).]
227 // The following formulas can be proved:
228 // z f''(z) + f'(z) - f(z) = 0,
229 // z g''(z) + g'(z) - g(z) = f'(z),
230 // g(z) = (log(z)/2 + c3 - 1/2 integral(t=..z, 1/(t f(t)^2) dt)) f(z)
231 // The Laplace method for determining the asymptotics of an integral
232 // or sum yields for real x>0 (the terms n = sqrt(x)+O(x^(1/4)) are
234 // f(x) = exp(2*sqrt(x))*x^(-1/4)*1/(2*sqrt(pi))*(1 + O(x^(-1/4)))
235 // g(x) = exp(2*sqrt(x))*x^(-1/4)*1/(2*sqrt(pi))*
236 // (1/2*log(x) + C + O(log(x)*x^(-1/4)))
238 // g(x)/f(x) - 1/2*log(x) - C = O(log(x)*x^(-1/4))
239 // This determines the constant c3, we thus have
240 // g(z)= (log(z)/2 + C + 1/2 integral(t=z..infty, 1/(t f(t)^2) dt)) f(z)
241 // Hence we have for x -> infty:
242 // g(x)/f(x) - 1/2*log(x) - C == pi*exp(-4*sqrt(x)) [approx.]
243 // This means that we can get C by computing
244 // g(x)/f(x) - 1/2*log(x)
246 // In order to get M bits of precision, we first choose x (real) such
247 // that exp(-4*sqrt(x)) < 2^-(M+2). This will make the absolute value
248 // of the integral small enough. x should be chosen as an integer,
249 // this is the key to computing the series sum very fast. sqrt(x)
250 // need not be an integer. Set sx = sqrt(x).
251 // sx = M*log(2)/4 + O(1).
252 // Then we choose the number N of terms:
253 // Note than the n-th term's absolute value is (logarithmically)
254 // n*log(x) - 2*n*log(n) + 2*n - log(n) - log(2 pi) + o(1).
255 // The derivative of this with respect to n is
256 // log(x) - 2*log(n) - 1/n + o(1/n),
257 // hence is increasing for n < sx and decreasing for n > sx.
258 // The maximum value is attained at n = sx + O(1), and is
259 // 2*sx + O(log x), which means that we need 2*sx/log(2) + O(log x)
260 // bits before the decimal point. This also follows from the
261 // asymptotic estimate for f(x).
262 // We can cut off the series when the relative error is < 2^-M,
263 // i.e. when the absolute error is
264 // < 2^-M*exp(2*sx)*sx^(-1/2)*1/(2*sqrt(pi)),
266 // n*log(x) - 2*n*log(n) + 2*n - log(n) - log(2 pi) <
267 // < -M*log(2) + 2*sx - 1/2*log(sx) - log(2 sqrt(pi))
268 // This happens at n = alpha*sx - 1/(4*log(alpha))*log(sx) + O(1),
269 // where alpha = 3.591121477... is the solution of
270 // -alpha*log(alpha) + alpha + 1 = 0.
271 // [Use the Newton iteration alpha --> (alpha+1)/log(alpha) to
272 // compute this number.]
273 // Finally we compute the sums of the series f(x) and g(x) with N terms
276 const cl_LF compute_eulerconst_besselintegral1 (uintC len)
278 // We compute f(x) classically and g(x) using the partial sums of f(x).
279 var uintC actuallen = len+1; // 1 Schutz-Digit
280 var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1;
281 var uintC N = (uintC)(3.591121477*sx);
282 var cl_I x = square((cl_I)sx);
283 var cl_LF eps = scale_float(cl_I_to_LF(1,LF_minlen),-(sintC)(sx*2.88539+10));
284 var cl_LF fterm = cl_I_to_LF(1,actuallen);
285 var cl_LF fsum = fterm;
286 var cl_LF gterm = cl_I_to_LF(0,actuallen);
287 var cl_LF gsum = gterm;
290 // fterm = x^n/n!^2, fsum = 1 + x/1!^2 + ... + x^n/n!^2,
291 // gterm = H_n*x^n/n!^2, gsum = H_1*x/1!^2 + ... + H_n*x^n/n!^2.
292 for (n = 1; n < N; n++) {
293 fterm = The(cl_LF)(fterm*x)/square((cl_I)n);
294 gterm = (The(cl_LF)(gterm*x)/(cl_I)n + fterm)/(cl_I)n;
295 if (len < 10 || n <= sx) {
299 // For n > sx, the terms are decreasing.
300 // So we can reduce the precision accordingly.
301 fterm = cl_LF_shortenwith(fterm,eps);
302 gterm = cl_LF_shortenwith(gterm,eps);
303 fsum = fsum + LF_to_LF(fterm,actuallen);
304 gsum = gsum + LF_to_LF(gterm,actuallen);
307 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(sx,actuallen));
308 return shorten(result,len); // verkürzen und fertig
310 // Bit complexity (N = len): O(N^2).
312 #if 0 // works, but besselintegral1 is faster
314 const cl_LF compute_eulerconst_besselintegral2 (uintC len)
316 // We compute the sum of the series f(x) as
317 // sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
318 // with a(n) = 1, b(n) = 1, p(n) = x for n>0, q(n) = n^2 for n>0.
319 // and the sum of the series g(x) as
320 // sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
322 // a(n) = HN_{n+1}, b(n) = 1, p(n) = x, q(n) = (n+1)^2 * HD_{n+1}/HD_{n}
323 // where HD_n := lcm(1,...,n) and HN_n := HD_n * H_n. (Note that
324 // HD_n need not be the lowest possible denominator of H_n. For
325 // example, n=6: H_6 = 49/20, but HD_6 = 60.)
326 // WARNING: The memory used by this algorithm grown quadratically in N.
327 // (Because HD_n grows like exp(n), hence HN_n grows like exp(n) as
328 // well, and we store all HN_n values in an array!)
329 var uintC actuallen = len+1; // 1 Schutz-Digit
330 var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1;
331 var uintC N = (uintC)(3.591121477*sx);
332 var cl_I x = square((cl_I)sx);
334 var cl_I* av = (cl_I*) cl_alloca(N*sizeof(cl_I));
335 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
336 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
339 init1(cl_I, pv[0]) (1);
340 init1(cl_I, qv[0]) (1);
341 for (n = 1; n < N; n++) {
342 init1(cl_I, pv[n]) (x);
343 init1(cl_I, qv[n]) ((cl_I)n*(cl_I)n);
345 var cl_pq_series fseries;
346 fseries.pv = pv; fseries.qv = qv; fseries.qsv = NULL;
347 var cl_LF fsum = eval_rational_series(N,fseries,actuallen);
348 for (n = 0; n < N; n++) {
355 for (n = 0; n < N; n++) {
357 var cl_I Hu = gcd(HD,n+1);
358 var cl_I Hv = exquopos(n+1,Hu);
359 HN = HN*Hv + exquopos(HD,Hu);
361 // Now HN/HD = H_{n+1}.
362 init1(cl_I, av[n]) (HN);
363 init1(cl_I, pv[n]) (x);
364 init1(cl_I, qv[n]) (Hv*(cl_I)(n+1)*(cl_I)(n+1));
366 var cl_pqa_series gseries;
368 gseries.pv = pv; gseries.qv = qv; gseries.qsv = NULL;
369 var cl_LF gsum = eval_rational_series(N,gseries,actuallen);
370 for (n = 0; n < N; n++) {
375 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(sx,actuallen));
376 return shorten(result,len); // verkürzen und fertig
378 // Bit complexity (N = len): O(N^2).
379 // Memory consumption: O(N^2).
381 // Same algorithm as besselintegral2, but without quadratic memory consumption.
382 #define cl_rational_series_for_g cl_rational_series_for_besselintegral3_g
383 struct cl_rational_series_for_g : cl_pqa_series_stream {
388 static cl_pqa_series_term computenext (cl_pqa_series_stream& thisss)
390 var cl_rational_series_for_g& thiss = (cl_rational_series_for_g&)thisss;
391 var uintL n = thiss.n;
393 var cl_I Hu = gcd(thiss.HD,n+1);
394 var cl_I Hv = exquopos(n+1,Hu);
395 thiss.HN = thiss.HN*Hv + exquopos(thiss.HD,Hu);
396 thiss.HD = thiss.HD*Hv;
397 // Now HN/HD = H_{n+1}.
398 var cl_pqa_series_term result;
400 result.q = Hv*(cl_I)(n+1)*(cl_I)(n+1);
405 cl_rational_series_for_g (const cl_I& _x)
406 : cl_pqa_series_stream (cl_rational_series_for_g::computenext),
407 n (0), HN (0), HD (1), x (_x) {}
409 const cl_LF compute_eulerconst_besselintegral3 (uintC len)
411 var uintC actuallen = len+1; // 1 Schutz-Digit
412 var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1;
413 var uintC N = (uintC)(3.591121477*sx);
414 var cl_I x = square((cl_I)sx);
416 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
417 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
420 init1(cl_I, pv[0]) (1);
421 init1(cl_I, qv[0]) (1);
422 for (n = 1; n < N; n++) {
423 init1(cl_I, pv[n]) (x);
424 init1(cl_I, qv[n]) ((cl_I)n*(cl_I)n);
426 var cl_pq_series fseries;
427 fseries.pv = pv; fseries.qv = qv; fseries.qsv = NULL;
428 var cl_LF fsum = eval_rational_series(N,fseries,actuallen);
429 for (n = 0; n < N; n++) {
434 var cl_rational_series_for_g gseries = cl_rational_series_for_g(x);
435 var cl_LF gsum = eval_rational_series(N,gseries,actuallen);
436 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(sx,actuallen));
437 return shorten(result,len); // verkürzen und fertig
439 // Bit complexity (N = len): O(N^2).
443 // Same algorithm as besselintegral1, but using binary splitting to evaluate
445 const cl_LF compute_eulerconst_besselintegral4 (uintC len)
447 var uintC actuallen = len+2; // 2 guard digits
448 var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1;
449 var uintC N = (uintC)(3.591121477*sx);
450 var cl_I x = square(cl_I(sx));
451 struct rational_series_stream : cl_pqd_series_stream {
454 static cl_pqd_series_term computenext (cl_pqd_series_stream& thisss)
456 var rational_series_stream& thiss = (rational_series_stream&)thisss;
457 var uintC n = thiss.n;
458 var cl_pqd_series_term result;
460 result.q = square(cl_I(n+1));
465 rational_series_stream (uintC _n, const cl_I& _x)
466 : cl_pqd_series_stream (rational_series_stream::computenext),
469 var cl_pqd_series_result<cl_R> sums;
470 eval_pqd_series_aux(N,series,sums,actuallen);
471 // Instead of computing fsum = 1 + T/Q and gsum = V/(D*Q)
472 // and then dividing them, to compute gsum/fsum, we save two
473 // divisions by computing V/(D*(Q+T)).
475 cl_R_to_LF(sums.V,actuallen)
476 / The(cl_LF)(sums.D * cl_R_to_LF(sums.Q+sums.T,actuallen))
477 - ln(cl_R_to_LF(sx,actuallen));
478 return shorten(result,len); // verkürzen und fertig
480 // Bit complexity (N = len): O(log(N)^2*M(N)).
482 // Timings of the above algorithms, on an i486 33 MHz, running Linux.
483 // N exp exp1 exp2 bessel1 bessel2 bessel3 bessel4
484 // 10 0.51 0.28 0.52 0.11 0.16 0.16 0.15
485 // 25 2.23 0.83 2.12 0.36 0.62 0.63 0.62
486 // 50 6.74 2.23 6.54 0.95 1.95 1.97 1.95
487 // 100 19.1 6.74 20.6 2.96 6.47 6.42 6.3
488 // 250 84 37.4 78 16.3 33.6 32.0 28.8
489 // 500 230 136.5 206 60.5 --- 111 85
490 // 1000 591 520 536 229 --- 377 241
493 // 2500 1744 2108 (1268) 855 (run)
494 // 2500 1845 2192 (1269) 891 (real)
496 // asymp. FAST N^2 FAST N^2 N^2 N^2 FAST
497 // (FAST means O(log(N)^2*M(N)))
499 // The break-even point between "bessel1" and "bessel4" is at about N = 1050.
500 const cl_LF compute_eulerconst (uintC len)
503 return compute_eulerconst_besselintegral4(len);
505 return compute_eulerconst_besselintegral1(len);
508 const cl_LF eulerconst (uintC len)
510 var uintC oldlen = TheLfloat(cl_LF_eulerconst)->len; // vorhandene Länge
512 return shorten(cl_LF_eulerconst,len);
514 return cl_LF_eulerconst;
516 // TheLfloat(cl_LF_eulerconst)->len um mindestens einen konstanten Faktor
517 // > 1 wachsen lassen, damit es nicht zu häufig nachberechnet wird:
518 var uintC newlen = len;
519 oldlen += floor(oldlen,2); // oldlen * 3/2
523 // gewünschte > vorhandene Länge -> muß nachberechnen:
524 cl_LF_eulerconst = compute_eulerconst(newlen);
525 return (len < newlen ? shorten(cl_LF_eulerconst,len) : cl_LF_eulerconst);