12 #include "cl_lfloat.h"
13 #include "cl_LF_tran.h"
15 #include "cl_integer.h"
16 #include "cl_alloca.h"
19 #if 0 // works, but besselintegral4 is always faster
21 const cl_LF compute_eulerconst_expintegral (uintC len)
23 // [Jonathan M. Borwein, Peter B. Borwein: Pi and the AGM.
24 // Wiley 1987. Section 10.2.3, exercise 11, p. 336]
25 // Use the following formula for the modified exponential integral
26 // (valid for Re(z) > 0)
27 // E1(z) := integral(t = z..+infty, exp(-t)/t dt)
28 // E1(z) = - log z - C + sum(n=1..infty, (-1)^(n-1) z^n / (n*n!))
29 // [Hint for proving this formula:
30 // 1. Learn about the elementary properties of the Gamma function.
31 // 2. -C = derivative of Gamma at 1
32 // = lim_{z -> 0} (Gamma(z) - 1/z)
33 // = integral(t = 0..1, (exp(-t)-1)/t dt)
34 // + integral(t = 1..infty, exp(-t)/t dt)
36 // 0 = integral(t=0..1, 1/(t+1)) - integral(t=1..infty, 1/t(t+1) dt)
38 // -C = integral(t = 0..infty, (exp(-t)/t - 1/t(t+1)) dt)
39 // 4. Compute E1(z) + C and note that E1(z) + C + log z is the integral
40 // of an entire function, hence an entire function as well.]
41 // Of course we also have the estimate
42 // |E1(z)| < exp(-Re(z)).
43 // This means that we can get C by computing
44 // sum(n=1..infty, (-1)^(n-1) z^n / (n*n!)) - log z
46 // In order to get M bits of precision, we first choose z (real) such
47 // that exp(-z) < 2^-M. This will make |E1(z)| small enough. z should
48 // be chosen as an integer, this is the key to computing the series
49 // sum very fast. z = M*log(2) + O(1).
50 // Then we choose the number N of terms:
51 // Note than the n-th term's absolute value is (logarithmically)
52 // n*log(z) - n*log(n) + n - 3/2*log(n) - log(sqrt(2 pi)) + o(1).
53 // The derivative of this with respect to n is
54 // log(z) - log(n) - 3/(2n) + o(1/n),
55 // hence is increasing for n < z and decreasing for n > z.
56 // The maximum value is attained at n = z + O(1), and is z + O(log z),
57 // which means that we need z/log(2) + O(log z) bits before the
59 // We can cut off the series when
60 // n*log(z) - n*log(n) + n - 3/2*log(n) - log(sqrt(2 pi)) < -M*log(2)
61 // This happens at n = alpha*z - 3/(2*log(alpha))*log(z) + O(1),
62 // where alpha = 3.591121477... is the solution of
63 // -alpha*log(alpha) + alpha + 1 = 0.
64 // [Use the Newton iteration alpha --> (alpha+1)/log(alpha) to
65 // compute this number.]
66 // Finally we compute the series's sum as
67 // sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
68 // with a(n) = 1, b(n) = n+1, p(n) = z for n=0, -z for n>0, q(n) = n+1.
69 // If we computed this with floating-point numbers, we would have
70 // to more than double the floating-point precision because of the large
71 // extinction which takes place. But luckily we compute with integers.
72 var uintC actuallen = len+1; // 1 Schutz-Digit
73 var uintL z = (uintL)(0.693148*intDsize*actuallen)+1;
74 var uintL N = (uintL)(3.591121477*z);
76 var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
77 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
78 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
80 for (n = 0; n < N; n++) {
81 init1(cl_I, bv[n]) (n+1);
82 init1(cl_I, pv[n]) (n==0 ? (cl_I)z : -(cl_I)z);
83 init1(cl_I, qv[n]) (n+1);
85 var cl_pqb_series series;
87 series.pv = pv; series.qv = qv; series.qsv = NULL;
88 var cl_LF fsum = eval_rational_series(N,series,actuallen);
89 for (n = 0; n < N; n++) {
94 fsum = fsum - ln(cl_I_to_LF(z,actuallen)); // log(z) subtrahieren
95 return shorten(fsum,len); // verkürzen und fertig
97 // Bit complexity (N = len): O(log(N)^2*M(N)).
101 #if 0 // works, but besselintegral1 is twice as fast
103 const cl_LF compute_eulerconst_expintegral1 (uintC len)
105 // Define f(z) := sum(n=0..infty, z^n/n!) = exp(z)
106 // and g(z) := sum(n=0..infty, H_n*z^n/n!)
107 // where H_n := 1/1 + 1/2 + ... + 1/n.
108 // The following formula can be proved:
109 // g'(z) - g(z) = (exp(z)-1)/z,
110 // g(z) = exp(z)*(log(z) + c3 + integral(t=..z, exp(-t)/t dt))
111 // The Laplace method for determining the asymptotics of an integral
112 // or sum yields for real x>0 (the terms n = x+O(x^(1/2)) are
114 // f(x) = exp(x)*(1 + O(x^(-1/2)))
115 // g(x) = exp(x)*(log(x) + C + O(log(x)*x^(-1/2)))
117 // g(x)/f(x) - log(x) - C = O(log(x)*x^(-1/2))
118 // This determines the constant c3, we thus have
119 // g(z) = exp(z)*(log(z) + C + integral(t=z..infty, exp(-t)/t dt))
120 // Hence we have for x -> infty:
121 // g(x)/f(x) - log(x) - C == O(exp(-x))
122 // This means that we can get C by computing
123 // g(x)/f(x) - log(x)
125 // In order to get M bits of precision, we first choose x (real) such
126 // that exp(-x) < 2^-M. This will make the absolute value of the
127 // integral small enough. x should be chosen as an integer, this is
128 // the key to computing the series sum very fast. x = M*log(2) + O(1).
129 // Then we choose the number N of terms:
130 // Note than the n-th term's absolute value is (logarithmically)
131 // n*log(x) - n*log(n) + n - 1/2*log(n) - 1/2*log(2 pi) + o(1).
132 // The derivative of this with respect to n is
133 // log(x) - log(n) - 1/2n + o(1/n),
134 // hence is increasing for n < x and decreasing for n > x.
135 // The maximum value is attained at n = x + O(1), and is
136 // x + O(log x), which means that we need x/log(2) + O(log x)
137 // bits before the decimal point. This also follows from the
138 // asymptotic estimate for f(x).
139 // We can cut off the series when the relative error is < 2^-M,
140 // i.e. when the absolute error is < 2^-M*exp(x), i.e.
141 // n*log(x) - n*log(n) + n - 1/2*log(n) - 1/2*log(2 pi) <
143 // This happens at n = e*x - 1/2*log(x) + O(1).
144 // Finally we compute the sums of the series f(x) and g(x) with N terms
146 // We compute f(x) classically and g(x) using the partial sums of f(x).
147 var uintC actuallen = len+2; // 2 Schutz-Digits
148 var uintL x = (uintL)(0.693148*intDsize*actuallen)+1;
149 var uintL N = (uintL)(2.718281828*x);
150 var cl_LF one = cl_I_to_LF(1,actuallen);
151 var cl_LF fterm = one;
152 var cl_LF fsum = fterm;
153 var cl_LF gterm = cl_I_to_LF(0,actuallen);
154 var cl_LF gsum = gterm;
157 // fterm = x^n/n!, fsum = 1 + x/1! + ... + x^n/n!,
158 // gterm = H_n*x^n/n!, gsum = H_1*x/1! + ... + H_n*x^n/n!.
159 for (n = 1; n < N; n++) {
160 fterm = The(cl_LF)(fterm*x)/n;
161 gterm = (The(cl_LF)(gterm*x) + fterm)/n;
162 if (len < 10 || n <= x) {
166 // For n > x, the terms are decreasing.
167 // So we can reduce the precision accordingly.
168 fterm = cl_LF_shortenwith(fterm,one);
169 gterm = cl_LF_shortenwith(gterm,one);
170 fsum = fsum + LF_to_LF(fterm,actuallen);
171 gsum = gsum + LF_to_LF(gterm,actuallen);
174 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(x,actuallen));
175 return shorten(result,len); // verkürzen und fertig
177 // Bit complexity (N = len): O(N^2).
181 #if 0 // works, but besselintegral4 is always faster
183 // Same algorithm as expintegral1, but using binary splitting to evaluate
185 const cl_LF compute_eulerconst_expintegral2 (uintC len)
187 var uintC actuallen = len+2; // 2 Schutz-Digits
188 var uintL x = (uintL)(0.693148*intDsize*actuallen)+1;
189 var uintL N = (uintL)(2.718281828*x);
191 var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
193 for (n = 0; n < N; n++) {
194 init1(cl_I, args[n].p) (x);
195 init1(cl_I, args[n].q) (n+1);
196 init1(cl_I, args[n].d) (n+1);
198 var cl_pqd_series_result sums;
199 eval_pqd_series_aux(N,args,sums);
200 // Instead of computing fsum = 1 + T/Q and gsum = V/(D*Q)
201 // and then dividing them, to compute gsum/fsum, we save two
202 // divisions by computing V/(D*(Q+T)).
204 cl_I_to_LF(sums.V,actuallen)
205 / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen))
206 - ln(cl_I_to_LF(x,actuallen));
207 for (n = 0; n < N; n++) {
212 return shorten(result,len); // verkürzen und fertig
214 // Bit complexity (N = len): O(log(N)^2*M(N)).
218 // cl_LF compute_eulerconst_besselintegral (uintC len)
219 // This is basically the algorithm used in Pari.
220 // Define f(z) := sum(n=0..infty, z^n/n!^2)
221 // and g(z) := sum(n=0..infty, H_n*z^n/n!^2)
222 // where H_n := 1/1 + 1/2 + ... + 1/n.
223 // [f(z) and g(z) are intimately related to the Bessel functions:
224 // f(x^2) = I_0(2*x), g(x^2) = K_0(2*x) + I_0(2*x*)*(C + log(x)).]
225 // The following formulas can be proved:
226 // z f''(z) + f'(z) - f(z) = 0,
227 // z g''(z) + g'(z) - g(z) = f'(z),
228 // g(z) = (log(z)/2 + c3 - 1/2 integral(t=..z, 1/(t f(t)^2) dt)) f(z)
229 // The Laplace method for determining the asymptotics of an integral
230 // or sum yields for real x>0 (the terms n = sqrt(x)+O(x^(1/4)) are
232 // f(x) = exp(2*sqrt(x))*x^(-1/4)*1/(2*sqrt(pi))*(1 + O(x^(-1/4)))
233 // g(x) = exp(2*sqrt(x))*x^(-1/4)*1/(2*sqrt(pi))*
234 // (1/2*log(x) + C + O(log(x)*x^(-1/4)))
236 // g(x)/f(x) - 1/2*log(x) - C = O(log(x)*x^(-1/4))
237 // This determines the constant c3, we thus have
238 // g(z)= (log(z)/2 + C + 1/2 integral(t=z..infty, 1/(t f(t)^2) dt)) f(z)
239 // Hence we have for x -> infty:
240 // g(x)/f(x) - 1/2*log(x) - C == pi*exp(-4*sqrt(x)) [approx.]
241 // This means that we can get C by computing
242 // g(x)/f(x) - 1/2*log(x)
244 // In order to get M bits of precision, we first choose x (real) such
245 // that exp(-4*sqrt(x)) < 2^-(M+2). This will make the absolute value
246 // of the integral small enough. x should be chosen as an integer,
247 // this is the key to computing the series sum very fast. sqrt(x)
248 // need not be an integer. Set sx = sqrt(x).
249 // sx = M*log(2)/4 + O(1).
250 // Then we choose the number N of terms:
251 // Note than the n-th term's absolute value is (logarithmically)
252 // n*log(x) - 2*n*log(n) + 2*n - log(n) - log(2 pi) + o(1).
253 // The derivative of this with respect to n is
254 // log(x) - 2*log(n) - 1/n + o(1/n),
255 // hence is increasing for n < sx and decreasing for n > sx.
256 // The maximum value is attained at n = sx + O(1), and is
257 // 2*sx + O(log x), which means that we need 2*sx/log(2) + O(log x)
258 // bits before the decimal point. This also follows from the
259 // asymptotic estimate for f(x).
260 // We can cut off the series when the relative error is < 2^-M,
261 // i.e. when the absolute error is
262 // < 2^-M*exp(2*sx)*sx^(-1/2)*1/(2*sqrt(pi)),
264 // n*log(x) - 2*n*log(n) + 2*n - log(n) - log(2 pi) <
265 // < -M*log(2) + 2*sx - 1/2*log(sx) - log(2 sqrt(pi))
266 // This happens at n = alpha*sx - 1/(4*log(alpha))*log(sx) + O(1),
267 // where alpha = 3.591121477... is the solution of
268 // -alpha*log(alpha) + alpha + 1 = 0.
269 // [Use the Newton iteration alpha --> (alpha+1)/log(alpha) to
270 // compute this number.]
271 // Finally we compute the sums of the series f(x) and g(x) with N terms
274 const cl_LF compute_eulerconst_besselintegral1 (uintC len)
276 // We compute f(x) classically and g(x) using the partial sums of f(x).
277 var uintC actuallen = len+1; // 1 Schutz-Digit
278 var uintL sx = (uintL)(0.25*0.693148*intDsize*actuallen)+1;
279 var uintL N = (uintL)(3.591121477*sx);
280 var cl_I x = square((cl_I)sx);
281 var cl_LF eps = scale_float(cl_I_to_LF(1,LF_minlen),-(sintL)(sx*2.88539+10));
282 var cl_LF fterm = cl_I_to_LF(1,actuallen);
283 var cl_LF fsum = fterm;
284 var cl_LF gterm = cl_I_to_LF(0,actuallen);
285 var cl_LF gsum = gterm;
288 // fterm = x^n/n!^2, fsum = 1 + x/1!^2 + ... + x^n/n!^2,
289 // gterm = H_n*x^n/n!^2, gsum = H_1*x/1!^2 + ... + H_n*x^n/n!^2.
290 for (n = 1; n < N; n++) {
291 fterm = The(cl_LF)(fterm*x)/square((cl_I)n);
292 gterm = (The(cl_LF)(gterm*x)/(cl_I)n + fterm)/(cl_I)n;
293 if (len < 10 || n <= sx) {
297 // For n > sx, the terms are decreasing.
298 // So we can reduce the precision accordingly.
299 fterm = cl_LF_shortenwith(fterm,eps);
300 gterm = cl_LF_shortenwith(gterm,eps);
301 fsum = fsum + LF_to_LF(fterm,actuallen);
302 gsum = gsum + LF_to_LF(gterm,actuallen);
305 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(sx,actuallen));
306 return shorten(result,len); // verkürzen und fertig
308 // Bit complexity (N = len): O(N^2).
310 #if 0 // works, but besselintegral1 is faster
312 const cl_LF compute_eulerconst_besselintegral2 (uintC len)
314 // We compute the sum of the series f(x) as
315 // sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
316 // with a(n) = 1, b(n) = 1, p(n) = x for n>0, q(n) = n^2 for n>0.
317 // and the sum of the series g(x) as
318 // sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
320 // a(n) = HN_{n+1}, b(n) = 1, p(n) = x, q(n) = (n+1)^2 * HD_{n+1}/HD_{n}
321 // where HD_n := lcm(1,...,n) and HN_n := HD_n * H_n. (Note that
322 // HD_n need not be the lowest possible denominator of H_n. For
323 // example, n=6: H_6 = 49/20, but HD_6 = 60.)
324 // WARNING: The memory used by this algorithm grown quadratically in N.
325 // (Because HD_n grows like exp(n), hence HN_n grows like exp(n) as
326 // well, and we store all HN_n values in an array!)
327 var uintC actuallen = len+1; // 1 Schutz-Digit
328 var uintL sx = (uintL)(0.25*0.693148*intDsize*actuallen)+1;
329 var uintL N = (uintL)(3.591121477*sx);
330 var cl_I x = square((cl_I)sx);
332 var cl_I* av = (cl_I*) cl_alloca(N*sizeof(cl_I));
333 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
334 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
337 init1(cl_I, pv[0]) (1);
338 init1(cl_I, qv[0]) (1);
339 for (n = 1; n < N; n++) {
340 init1(cl_I, pv[n]) (x);
341 init1(cl_I, qv[n]) ((cl_I)n*(cl_I)n);
343 var cl_pq_series fseries;
344 fseries.pv = pv; fseries.qv = qv; fseries.qsv = NULL;
345 var cl_LF fsum = eval_rational_series(N,fseries,actuallen);
346 for (n = 0; n < N; n++) {
353 for (n = 0; n < N; n++) {
355 var cl_I Hu = gcd(HD,n+1);
356 var cl_I Hv = exquopos(n+1,Hu);
357 HN = HN*Hv + exquopos(HD,Hu);
359 // Now HN/HD = H_{n+1}.
360 init1(cl_I, av[n]) (HN);
361 init1(cl_I, pv[n]) (x);
362 init1(cl_I, qv[n]) (Hv*(cl_I)(n+1)*(cl_I)(n+1));
364 var cl_pqa_series gseries;
366 gseries.pv = pv; gseries.qv = qv; gseries.qsv = NULL;
367 var cl_LF gsum = eval_rational_series(N,gseries,actuallen);
368 for (n = 0; n < N; n++) {
373 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(sx,actuallen));
374 return shorten(result,len); // verkürzen und fertig
376 // Bit complexity (N = len): O(N^2).
377 // Memory consumption: O(N^2).
379 // Same algorithm as besselintegral2, but without quadratic memory consumption.
380 #define cl_rational_series_for_g cl_rational_series_for_besselintegral3_g
381 struct cl_rational_series_for_g : cl_pqa_series_stream {
386 static cl_pqa_series_term computenext (cl_pqa_series_stream& thisss)
388 var cl_rational_series_for_g& thiss = (cl_rational_series_for_g&)thisss;
389 var uintL n = thiss.n;
391 var cl_I Hu = gcd(thiss.HD,n+1);
392 var cl_I Hv = exquopos(n+1,Hu);
393 thiss.HN = thiss.HN*Hv + exquopos(thiss.HD,Hu);
394 thiss.HD = thiss.HD*Hv;
395 // Now HN/HD = H_{n+1}.
396 var cl_pqa_series_term result;
398 result.q = Hv*(cl_I)(n+1)*(cl_I)(n+1);
403 cl_rational_series_for_g (const cl_I& _x)
404 : cl_pqa_series_stream (cl_rational_series_for_g::computenext),
405 n (0), HN (0), HD (1), x (_x) {}
407 const cl_LF compute_eulerconst_besselintegral3 (uintC len)
409 var uintC actuallen = len+1; // 1 Schutz-Digit
410 var uintL sx = (uintL)(0.25*0.693148*intDsize*actuallen)+1;
411 var uintL N = (uintL)(3.591121477*sx);
412 var cl_I x = square((cl_I)sx);
414 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
415 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
418 init1(cl_I, pv[0]) (1);
419 init1(cl_I, qv[0]) (1);
420 for (n = 1; n < N; n++) {
421 init1(cl_I, pv[n]) (x);
422 init1(cl_I, qv[n]) ((cl_I)n*(cl_I)n);
424 var cl_pq_series fseries;
425 fseries.pv = pv; fseries.qv = qv; fseries.qsv = NULL;
426 var cl_LF fsum = eval_rational_series(N,fseries,actuallen);
427 for (n = 0; n < N; n++) {
432 var cl_rational_series_for_g gseries = cl_rational_series_for_g(x);
433 var cl_LF gsum = eval_rational_series(N,gseries,actuallen);
434 var cl_LF result = gsum/fsum - ln(cl_I_to_LF(sx,actuallen));
435 return shorten(result,len); // verkürzen und fertig
437 // Bit complexity (N = len): O(N^2).
441 // Same algorithm as besselintegral1, but using binary splitting to evaluate
443 const cl_LF compute_eulerconst_besselintegral4 (uintC len)
445 var uintC actuallen = len+2; // 2 Schutz-Digits
446 var uintL sx = (uintL)(0.25*0.693148*intDsize*actuallen)+1;
447 var uintL N = (uintL)(3.591121477*sx);
448 var cl_I x = square((cl_I)sx);
450 var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
452 for (n = 0; n < N; n++) {
453 init1(cl_I, args[n].p) (x);
454 init1(cl_I, args[n].q) (square((cl_I)(n+1)));
455 init1(cl_I, args[n].d) (n+1);
457 var cl_pqd_series_result sums;
458 eval_pqd_series_aux(N,args,sums);
459 // Instead of computing fsum = 1 + T/Q and gsum = V/(D*Q)
460 // and then dividing them, to compute gsum/fsum, we save two
461 // divisions by computing V/(D*(Q+T)).
463 cl_I_to_LF(sums.V,actuallen)
464 / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen))
465 - ln(cl_I_to_LF(sx,actuallen));
466 for (n = 0; n < N; n++) {
471 return shorten(result,len); // verkürzen und fertig
473 // Bit complexity (N = len): O(log(N)^2*M(N)).
475 // Timings of the above algorithms, on an i486 33 MHz, running Linux.
476 // N exp exp1 exp2 bessel1 bessel2 bessel3 bessel4
477 // 10 0.51 0.28 0.52 0.11 0.16 0.16 0.15
478 // 25 2.23 0.83 2.12 0.36 0.62 0.63 0.62
479 // 50 6.74 2.23 6.54 0.95 1.95 1.97 1.95
480 // 100 19.1 6.74 20.6 2.96 6.47 6.42 6.3
481 // 250 84 37.4 78 16.3 33.6 32.0 28.8
482 // 500 230 136.5 206 60.5 --- 111 85
483 // 1000 591 520 536 229 --- 377 241
486 // 2500 1744 2108 (1268) 855 (run)
487 // 2500 1845 2192 (1269) 891 (real)
489 // asymp. FAST N^2 FAST N^2 N^2 N^2 FAST
490 // (FAST means O(log(N)^2*M(N)))
492 // The break-even point between "bessel1" and "bessel4" is at about N = 1050.
493 const cl_LF compute_eulerconst (uintC len)
496 return compute_eulerconst_besselintegral4(len);
498 return compute_eulerconst_besselintegral1(len);
501 const cl_LF cl_eulerconst (uintC len)
503 var uintC oldlen = TheLfloat(cl_LF_eulerconst)->len; // vorhandene Länge
505 return shorten(cl_LF_eulerconst,len);
507 return cl_LF_eulerconst;
509 // TheLfloat(cl_LF_eulerconst)->len um mindestens einen konstanten Faktor
510 // > 1 wachsen lassen, damit es nicht zu häufig nachberechnet wird:
511 var uintC newlen = len;
512 oldlen += floor(oldlen,2); // oldlen * 3/2
516 // gewünschte > vorhandene Länge -> muß nachberechnen:
517 cl_LF_eulerconst = compute_eulerconst(newlen);
518 return (len < newlen ? shorten(cl_LF_eulerconst,len) : cl_LF_eulerconst);