12 #include "cln/lfloat.h"
13 #include "cl_LF_tran.h"
15 #include "cln/integer.h"
16 #include "cl_alloca.h"
20 #if 0 // works, but besselintegral4 is always faster
22 const cl_LF compute_eulerconst_expintegral (uintC len)
24 // [Jonathan M. Borwein, Peter B. Borwein: Pi and the AGM.
25 // Wiley 1987. Section 10.2.3, exercise 11, p. 336]
26 // Use the following formula for the modified exponential integral
27 // (valid for Re(z) > 0)
28 // E1(z) := integral(t = z..+infty, exp(-t)/t dt)
29 // E1(z) = - log z - C + sum(n=1..infty, (-1)^(n-1) z^n / (n*n!))
30 // [Hint for proving this formula:
31 // 1. Learn about the elementary properties of the Gamma function.
32 // 2. -C = derivative of Gamma at 1
33 // = lim_{z -> 0} (Gamma(z) - 1/z)
34 // = integral(t = 0..1, (exp(-t)-1)/t dt)
35 // + integral(t = 1..infty, exp(-t)/t dt)
37 // 0 = integral(t=0..1, 1/(t+1)) - integral(t=1..infty, 1/t(t+1) dt)
39 // -C = integral(t = 0..infty, (exp(-t)/t - 1/t(t+1)) dt)
40 // 4. Compute E1(z) + C and note that E1(z) + C + log z is the integral
41 // of an entire function, hence an entire function as well.]
42 // Of course we also have the estimate
43 // |E1(z)| < exp(-Re(z)).
44 // This means that we can get C by computing
45 // sum(n=1..infty, (-1)^(n-1) z^n / (n*n!)) - log z
47 // In order to get M bits of precision, we first choose z (real) such
48 // that exp(-z) < 2^-M. This will make |E1(z)| small enough. z should
49 // be chosen as an integer, this is the key to computing the series
50 // sum very fast. z = M*log(2) + O(1).
51 // Then we choose the number N of terms:
52 // Note than the n-th term's absolute value is (logarithmically)
53 // n*log(z) - n*log(n) + n - 3/2*log(n) - log(sqrt(2 pi)) + o(1).
54 // The derivative of this with respect to n is
55 // log(z) - log(n) - 3/(2n) + o(1/n),
56 // hence is increasing for n < z and decreasing for n > z.
57 // The maximum value is attained at n = z + O(1), and is z + O(log z),
58 // which means that we need z/log(2) + O(log z) bits before the
60 // We can cut off the series when
61 // n*log(z) - n*log(n) + n - 3/2*log(n) - log(sqrt(2 pi)) < -M*log(2)
62 // This happens at n = alpha*z - 3/(2*log(alpha))*log(z) + O(1),
63 // where alpha = 3.591121477... is the solution of
64 // -alpha*log(alpha) + alpha + 1 = 0.
65 // [Use the Newton iteration alpha --> (alpha+1)/log(alpha) to
66 // compute this number.]
67 // Finally we compute the series's sum as
68 // sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
69 // with a(n) = 1, b(n) = n+1, p(n) = z for n=0, -z for n>0, q(n) = n+1.
70 // If we computed this with floating-point numbers, we would have
71 // to more than double the floating-point precision because of the large
72 // extinction which takes place. But luckily we compute with integers.
73 var uintC actuallen = len+1; // 1 Schutz-Digit
74 var uintC z = (uintC)(0.693148*intDsize*actuallen)+1;
75 var uintC N = (uintC)(3.591121477*z);
77 var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
78 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
79 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
81 for (n = 0; n < N; n++) {
82 init1(cl_I, bv[n]) (n+1);
83 init1(cl_I, pv[n]) (n==0 ? (cl_I)z : -(cl_I)z);
84 init1(cl_I, qv[n]) (n+1);
86 var cl_pqb_series series;
88 series.pv = pv; series.qv = qv; series.qsv = NULL;
89 var cl_LF fsum = eval_rational_series(N,series,actuallen);
90 for (n = 0; n < N; n++) {
95 fsum = fsum - ln(cl_I_to_LF(z,actuallen)); // log(z) subtrahieren
96 return shorten(fsum,len); // verkürzen und fertig
98 // Bit complexity (N = len): O(log(N)^2*M(N)).
102 #if 0 // works, but besselintegral1 is twice as fast
104 const cl_LF compute_eulerconst_expintegral1 (uintC len)
106 // Define f(z) := sum(n=0..infty, z^n/n!) = exp(z)
107 // and g(z) := sum(n=0..infty, H_n*z^n/n!)
108 // where H_n := 1/1 + 1/2 + ... + 1/n.
109 // The following formula can be proved:
110 // g'(z) - g(z) = (exp(z)-1)/z,
111 // g(z) = exp(z)*(log(z) + c3 + integral(t=..z, exp(-t)/t dt))
112 // The Laplace method for determining the asymptotics of an integral
113 // or sum yields for real x>0 (the terms n = x+O(x^(1/2)) are
115 // f(x) = exp(x)*(1 + O(x^(-1/2)))
116 // g(x) = exp(x)*(log(x) + C + O(log(x)*x^(-1/2)))
118 // g(x)/f(x) - log(x) - C = O(log(x)*x^(-1/2))
119 // This determines the constant c3, we thus have
120 // g(z) = exp(z)*(log(z) + C + integral(t=z..infty, exp(-t)/t dt))
121 // Hence we have for x -> infty:
122 // g(x)/f(x) - log(x) - C == O(exp(-x))
123 // This means that we can get C by computing
124 // g(x)/f(x) - log(x)
126 // In order to get M bits of precision, we first choose x (real) such
127 // that exp(-x) < 2^-M. This will make the absolute value of the
128 // integral small enough. x should be chosen as an integer, this is
129 // the key to computing the series sum very fast. x = M*log(2) + O(1).
130 // Then we choose the number N of terms:
131 // Note than the n-th term's absolute value is (logarithmically)
132 // n*log(x) - n*log(n) + n - 1/2*log(n) - 1/2*log(2 pi) + o(1).
133 // The derivative of this with respect to n is
134 // log(x) - log(n) - 1/2n + o(1/n),
135 // hence is increasing for n < x and decreasing for n > x.
136 // The maximum value is attained at n = x + O(1), and is
137 // x + O(log x), which means that we need x/log(2) + O(log x)
138 // bits before the decimal point. This also follows from the
139 // asymptotic estimate for f(x).
140 // We can cut off the series when the relative error is < 2^-M,
141 // i.e. when the absolute error is < 2^-M*exp(x), i.e.
142 // n*log(x) - n*log(n) + n - 1/2*log(n) - 1/2*log(2 pi) <
144 // This happens at n = e*x - 1/2*log(x) + O(1).
145 // Finally we compute the sums of the series f(x) and g(x) with N terms
147 // We compute f(x) classically and g(x) using the partial sums of f(x).
148 var uintC actuallen = len+2; // 2 Schutz-Digits
149 var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
150 var uintC N = (uintC)(2.718281828*x);
151 var cl_LF one = cl_I_to_LF(1,actuallen);
152 var cl_LF fterm = one;
153 var cl_LF fsum = fterm;
154 var cl_LF gterm = cl_I_to_LF(0,actuallen);
155 var cl_LF gsum = gterm;
158 // fterm = x^n/n!, fsum = 1 + x/1! + ... + x^n/n!,
159 // gterm = H_n*x^n/n!, gsum = H_1*x/1! + ... + H_n*x^n/n!.
160 for (n = 1; n < N; n++) {
161 fterm = The(cl_LF)(fterm*x)/n;
162 gterm = (The(cl_LF)(gterm*x) + fterm)/n;
163 if (len < 10 || n <= x) {
167 // For n > x, the terms are decreasing.
168 // So we can reduce the precision accordingly.
169 fterm = cl_LF_shortenwith(fterm,one);
170 gterm = cl_LF_shortenwith(gterm,one);
171 fsum = fsum + LF_to_LF(fterm,actuallen);
172 gsum = gsum + LF_to_LF(gterm,actuallen);
175 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(x,actuallen));
176 return shorten(result,len); // verkürzen und fertig
178 // Bit complexity (N = len): O(N^2).
182 #if 0 // works, but besselintegral4 is always faster
184 // Same algorithm as expintegral1, but using binary splitting to evaluate
186 const cl_LF compute_eulerconst_expintegral2 (uintC len)
188 var uintC actuallen = len+2; // 2 Schutz-Digits
189 var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
190 var uintC N = (uintC)(2.718281828*x);
192 var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
194 for (n = 0; n < N; n++) {
195 init1(cl_I, args[n].p) (x);
196 init1(cl_I, args[n].q) (n+1);
197 init1(cl_I, args[n].d) (n+1);
199 var cl_pqd_series_result sums;
200 eval_pqd_series_aux(N,args,sums);
201 // Instead of computing fsum = 1 + T/Q and gsum = V/(D*Q)
202 // and then dividing them, to compute gsum/fsum, we save two
203 // divisions by computing V/(D*(Q+T)).
205 cl_I_to_LF(sums.V,actuallen)
206 / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen))
207 - ln(cl_I_to_LF(x,actuallen));
208 for (n = 0; n < N; n++) {
213 return shorten(result,len); // verkürzen und fertig
215 // Bit complexity (N = len): O(log(N)^2*M(N)).
219 // cl_LF compute_eulerconst_besselintegral (uintC len)
220 // This is basically the algorithm used in Pari.
221 // Define f(z) := sum(n=0..infty, z^n/n!^2)
222 // and g(z) := sum(n=0..infty, H_n*z^n/n!^2)
223 // where H_n := 1/1 + 1/2 + ... + 1/n.
224 // [f(z) and g(z) are intimately related to the Bessel functions:
225 // f(x^2) = I_0(2*x), g(x^2) = K_0(2*x) + I_0(2*x*)*(C + log(x)).]
226 // The following formulas can be proved:
227 // z f''(z) + f'(z) - f(z) = 0,
228 // z g''(z) + g'(z) - g(z) = f'(z),
229 // g(z) = (log(z)/2 + c3 - 1/2 integral(t=..z, 1/(t f(t)^2) dt)) f(z)
230 // The Laplace method for determining the asymptotics of an integral
231 // or sum yields for real x>0 (the terms n = sqrt(x)+O(x^(1/4)) are
233 // f(x) = exp(2*sqrt(x))*x^(-1/4)*1/(2*sqrt(pi))*(1 + O(x^(-1/4)))
234 // g(x) = exp(2*sqrt(x))*x^(-1/4)*1/(2*sqrt(pi))*
235 // (1/2*log(x) + C + O(log(x)*x^(-1/4)))
237 // g(x)/f(x) - 1/2*log(x) - C = O(log(x)*x^(-1/4))
238 // This determines the constant c3, we thus have
239 // g(z)= (log(z)/2 + C + 1/2 integral(t=z..infty, 1/(t f(t)^2) dt)) f(z)
240 // Hence we have for x -> infty:
241 // g(x)/f(x) - 1/2*log(x) - C == pi*exp(-4*sqrt(x)) [approx.]
242 // This means that we can get C by computing
243 // g(x)/f(x) - 1/2*log(x)
245 // In order to get M bits of precision, we first choose x (real) such
246 // that exp(-4*sqrt(x)) < 2^-(M+2). This will make the absolute value
247 // of the integral small enough. x should be chosen as an integer,
248 // this is the key to computing the series sum very fast. sqrt(x)
249 // need not be an integer. Set sx = sqrt(x).
250 // sx = M*log(2)/4 + O(1).
251 // Then we choose the number N of terms:
252 // Note than the n-th term's absolute value is (logarithmically)
253 // n*log(x) - 2*n*log(n) + 2*n - log(n) - log(2 pi) + o(1).
254 // The derivative of this with respect to n is
255 // log(x) - 2*log(n) - 1/n + o(1/n),
256 // hence is increasing for n < sx and decreasing for n > sx.
257 // The maximum value is attained at n = sx + O(1), and is
258 // 2*sx + O(log x), which means that we need 2*sx/log(2) + O(log x)
259 // bits before the decimal point. This also follows from the
260 // asymptotic estimate for f(x).
261 // We can cut off the series when the relative error is < 2^-M,
262 // i.e. when the absolute error is
263 // < 2^-M*exp(2*sx)*sx^(-1/2)*1/(2*sqrt(pi)),
265 // n*log(x) - 2*n*log(n) + 2*n - log(n) - log(2 pi) <
266 // < -M*log(2) + 2*sx - 1/2*log(sx) - log(2 sqrt(pi))
267 // This happens at n = alpha*sx - 1/(4*log(alpha))*log(sx) + O(1),
268 // where alpha = 3.591121477... is the solution of
269 // -alpha*log(alpha) + alpha + 1 = 0.
270 // [Use the Newton iteration alpha --> (alpha+1)/log(alpha) to
271 // compute this number.]
272 // Finally we compute the sums of the series f(x) and g(x) with N terms
275 const cl_LF compute_eulerconst_besselintegral1 (uintC len)
277 // We compute f(x) classically and g(x) using the partial sums of f(x).
278 var uintC actuallen = len+1; // 1 Schutz-Digit
279 var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1;
280 var uintC N = (uintC)(3.591121477*sx);
281 var cl_I x = square((cl_I)sx);
282 var cl_LF eps = scale_float(cl_I_to_LF(1,LF_minlen),-(sintC)(sx*2.88539+10));
283 var cl_LF fterm = cl_I_to_LF(1,actuallen);
284 var cl_LF fsum = fterm;
285 var cl_LF gterm = cl_I_to_LF(0,actuallen);
286 var cl_LF gsum = gterm;
289 // fterm = x^n/n!^2, fsum = 1 + x/1!^2 + ... + x^n/n!^2,
290 // gterm = H_n*x^n/n!^2, gsum = H_1*x/1!^2 + ... + H_n*x^n/n!^2.
291 for (n = 1; n < N; n++) {
292 fterm = The(cl_LF)(fterm*x)/square((cl_I)n);
293 gterm = (The(cl_LF)(gterm*x)/(cl_I)n + fterm)/(cl_I)n;
294 if (len < 10 || n <= sx) {
298 // For n > sx, the terms are decreasing.
299 // So we can reduce the precision accordingly.
300 fterm = cl_LF_shortenwith(fterm,eps);
301 gterm = cl_LF_shortenwith(gterm,eps);
302 fsum = fsum + LF_to_LF(fterm,actuallen);
303 gsum = gsum + LF_to_LF(gterm,actuallen);
306 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(sx,actuallen));
307 return shorten(result,len); // verkürzen und fertig
309 // Bit complexity (N = len): O(N^2).
311 #if 0 // works, but besselintegral1 is faster
313 const cl_LF compute_eulerconst_besselintegral2 (uintC len)
315 // We compute the sum of the series f(x) as
316 // sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
317 // with a(n) = 1, b(n) = 1, p(n) = x for n>0, q(n) = n^2 for n>0.
318 // and the sum of the series g(x) as
319 // sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
321 // a(n) = HN_{n+1}, b(n) = 1, p(n) = x, q(n) = (n+1)^2 * HD_{n+1}/HD_{n}
322 // where HD_n := lcm(1,...,n) and HN_n := HD_n * H_n. (Note that
323 // HD_n need not be the lowest possible denominator of H_n. For
324 // example, n=6: H_6 = 49/20, but HD_6 = 60.)
325 // WARNING: The memory used by this algorithm grown quadratically in N.
326 // (Because HD_n grows like exp(n), hence HN_n grows like exp(n) as
327 // well, and we store all HN_n values in an array!)
328 var uintC actuallen = len+1; // 1 Schutz-Digit
329 var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1;
330 var uintC N = (uintC)(3.591121477*sx);
331 var cl_I x = square((cl_I)sx);
333 var cl_I* av = (cl_I*) cl_alloca(N*sizeof(cl_I));
334 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
335 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
338 init1(cl_I, pv[0]) (1);
339 init1(cl_I, qv[0]) (1);
340 for (n = 1; n < N; n++) {
341 init1(cl_I, pv[n]) (x);
342 init1(cl_I, qv[n]) ((cl_I)n*(cl_I)n);
344 var cl_pq_series fseries;
345 fseries.pv = pv; fseries.qv = qv; fseries.qsv = NULL;
346 var cl_LF fsum = eval_rational_series(N,fseries,actuallen);
347 for (n = 0; n < N; n++) {
354 for (n = 0; n < N; n++) {
356 var cl_I Hu = gcd(HD,n+1);
357 var cl_I Hv = exquopos(n+1,Hu);
358 HN = HN*Hv + exquopos(HD,Hu);
360 // Now HN/HD = H_{n+1}.
361 init1(cl_I, av[n]) (HN);
362 init1(cl_I, pv[n]) (x);
363 init1(cl_I, qv[n]) (Hv*(cl_I)(n+1)*(cl_I)(n+1));
365 var cl_pqa_series gseries;
367 gseries.pv = pv; gseries.qv = qv; gseries.qsv = NULL;
368 var cl_LF gsum = eval_rational_series(N,gseries,actuallen);
369 for (n = 0; n < N; n++) {
374 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(sx,actuallen));
375 return shorten(result,len); // verkürzen und fertig
377 // Bit complexity (N = len): O(N^2).
378 // Memory consumption: O(N^2).
380 // Same algorithm as besselintegral2, but without quadratic memory consumption.
381 #define cl_rational_series_for_g cl_rational_series_for_besselintegral3_g
382 struct cl_rational_series_for_g : cl_pqa_series_stream {
387 static cl_pqa_series_term computenext (cl_pqa_series_stream& thisss)
389 var cl_rational_series_for_g& thiss = (cl_rational_series_for_g&)thisss;
390 var uintL n = thiss.n;
392 var cl_I Hu = gcd(thiss.HD,n+1);
393 var cl_I Hv = exquopos(n+1,Hu);
394 thiss.HN = thiss.HN*Hv + exquopos(thiss.HD,Hu);
395 thiss.HD = thiss.HD*Hv;
396 // Now HN/HD = H_{n+1}.
397 var cl_pqa_series_term result;
399 result.q = Hv*(cl_I)(n+1)*(cl_I)(n+1);
404 cl_rational_series_for_g (const cl_I& _x)
405 : cl_pqa_series_stream (cl_rational_series_for_g::computenext),
406 n (0), HN (0), HD (1), x (_x) {}
408 const cl_LF compute_eulerconst_besselintegral3 (uintC len)
410 var uintC actuallen = len+1; // 1 Schutz-Digit
411 var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1;
412 var uintC N = (uintC)(3.591121477*sx);
413 var cl_I x = square((cl_I)sx);
415 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
416 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
419 init1(cl_I, pv[0]) (1);
420 init1(cl_I, qv[0]) (1);
421 for (n = 1; n < N; n++) {
422 init1(cl_I, pv[n]) (x);
423 init1(cl_I, qv[n]) ((cl_I)n*(cl_I)n);
425 var cl_pq_series fseries;
426 fseries.pv = pv; fseries.qv = qv; fseries.qsv = NULL;
427 var cl_LF fsum = eval_rational_series(N,fseries,actuallen);
428 for (n = 0; n < N; n++) {
433 var cl_rational_series_for_g gseries = cl_rational_series_for_g(x);
434 var cl_LF gsum = eval_rational_series(N,gseries,actuallen);
435 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(sx,actuallen));
436 return shorten(result,len); // verkürzen und fertig
438 // Bit complexity (N = len): O(N^2).
442 // Same algorithm as besselintegral1, but using binary splitting to evaluate
444 const cl_LF compute_eulerconst_besselintegral4 (uintC len)
446 var uintC actuallen = len+2; // 2 Schutz-Digits
447 var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1;
448 var uintC N = (uintC)(3.591121477*sx);
449 var cl_I x = square((cl_I)sx);
451 var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
453 for (n = 0; n < N; n++) {
454 init1(cl_I, args[n].p) (x);
455 init1(cl_I, args[n].q) (square((cl_I)(n+1)));
456 init1(cl_I, args[n].d) (n+1);
458 var cl_pqd_series_result sums;
459 eval_pqd_series_aux(N,args,sums);
460 // Instead of computing fsum = 1 + T/Q and gsum = V/(D*Q)
461 // and then dividing them, to compute gsum/fsum, we save two
462 // divisions by computing V/(D*(Q+T)).
464 cl_I_to_LF(sums.V,actuallen)
465 / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen))
466 - ln(cl_I_to_LF(sx,actuallen));
467 for (n = 0; n < N; n++) {
472 return shorten(result,len); // verkürzen und fertig
474 // Bit complexity (N = len): O(log(N)^2*M(N)).
476 // Timings of the above algorithms, on an i486 33 MHz, running Linux.
477 // N exp exp1 exp2 bessel1 bessel2 bessel3 bessel4
478 // 10 0.51 0.28 0.52 0.11 0.16 0.16 0.15
479 // 25 2.23 0.83 2.12 0.36 0.62 0.63 0.62
480 // 50 6.74 2.23 6.54 0.95 1.95 1.97 1.95
481 // 100 19.1 6.74 20.6 2.96 6.47 6.42 6.3
482 // 250 84 37.4 78 16.3 33.6 32.0 28.8
483 // 500 230 136.5 206 60.5 --- 111 85
484 // 1000 591 520 536 229 --- 377 241
487 // 2500 1744 2108 (1268) 855 (run)
488 // 2500 1845 2192 (1269) 891 (real)
490 // asymp. FAST N^2 FAST N^2 N^2 N^2 FAST
491 // (FAST means O(log(N)^2*M(N)))
493 // The break-even point between "bessel1" and "bessel4" is at about N = 1050.
494 const cl_LF compute_eulerconst (uintC len)
497 return compute_eulerconst_besselintegral4(len);
499 return compute_eulerconst_besselintegral1(len);
502 const cl_LF eulerconst (uintC len)
504 var uintC oldlen = TheLfloat(cl_LF_eulerconst)->len; // vorhandene Länge
506 return shorten(cl_LF_eulerconst,len);
508 return cl_LF_eulerconst;
510 // TheLfloat(cl_LF_eulerconst)->len um mindestens einen konstanten Faktor
511 // > 1 wachsen lassen, damit es nicht zu häufig nachberechnet wird:
512 var uintC newlen = len;
513 oldlen += floor(oldlen,2); // oldlen * 3/2
517 // gewünschte > vorhandene Länge -> muß nachberechnen:
518 cl_LF_eulerconst = compute_eulerconst(newlen);
519 return (len < newlen ? shorten(cl_LF_eulerconst,len) : cl_LF_eulerconst);