X-Git-Url: https://ginac.de/CLN/cln.git//cln.git?a=blobdiff_plain;f=src%2Ffloat%2Ftranscendental%2Fcl_LF_catalanconst.cc;h=a68219b6262c8c03634579c898046e10158a6f87;hb=3af2cde18b3aabed4c808b0113daa81c2263b0bd;hp=866c9566eccf48f5b241e04ec475f82935c5c004;hpb=850abfde7f0d985ba01526c346bcd0d733562943;p=cln.git diff --git a/src/float/transcendental/cl_LF_catalanconst.cc b/src/float/transcendental/cl_LF_catalanconst.cc index 866c956..a68219b 100644 --- a/src/float/transcendental/cl_LF_catalanconst.cc +++ b/src/float/transcendental/cl_LF_catalanconst.cc @@ -1,19 +1,19 @@ // catalanconst(). // General includes. -#include "cl_sysdep.h" +#include "base/cl_sysdep.h" // Specification. -#include "cl_F_tran.h" +#include "float/transcendental/cl_F_tran.h" // Implementation. #include "cln/lfloat.h" -#include "cl_LF_tran.h" -#include "cl_LF.h" +#include "float/transcendental/cl_LF_tran.h" +#include "float/lfloat/cl_LF.h" #include "cln/integer.h" -#include "cl_alloca.h" +#include "base/cl_alloca.h" namespace cln { @@ -26,8 +26,8 @@ const cl_LF compute_catalanconst_ramanujan (uintC len) // Every summand gives 0.6 new decimal digits in precision. // The sum is best evaluated using fixed-point arithmetic, // so that the precision is reduced for the later summands. - var uintL actuallen = len + 2; // 2 Schutz-Digits - var sintL scale = intDsize*actuallen; + var uintC actuallen = len + 2; // 2 guard digits + var sintC scale = intDsize*actuallen; var cl_I sum = 0; var cl_I n = 0; var cl_I factor = ash(1,scale); @@ -42,7 +42,7 @@ const cl_LF compute_catalanconst_ramanujan (uintC len) + The(cl_LF)(pi(actuallen)) * ln(cl_I_to_LF(2,actuallen)+sqrt(cl_I_to_LF(3,actuallen))), -3); - return shorten(g,len); // verkürzen und fertig + return shorten(g,len); // verkürzen und fertig } // Bit complexity (N := len): O(N^2). @@ -50,55 +50,57 @@ const cl_LF compute_catalanconst_ramanujan_fast (uintC len) { // Same formula as above, using a binary splitting evaluation. // See [Borwein, Borwein, section 10.2.3]. - var uintL actuallen = len + 2; // 2 Schutz-Digits + struct rational_series_stream : cl_pqb_series_stream { + cl_I n; + static cl_pqb_series_term computenext (cl_pqb_series_stream& thisss) + { + var rational_series_stream& thiss = (rational_series_stream&)thisss; + var cl_I n = thiss.n; + var cl_pqb_series_term result; + if (n==0) { + result.p = 1; + result.q = 1; + result.b = 1; + } else { + result.p = n; + result.b = 2*n+1; + result.q = result.b << 1; // 2*(2*n+1) + } + thiss.n = n+1; + return result; + } + rational_series_stream () + : cl_pqb_series_stream (rational_series_stream::computenext), + n (0) {} + } series; + var uintC actuallen = len + 2; // 2 guard digits // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n))) // with appropriate N, and // a(n) = 1, b(n) = 2*n+1, // p(n) = n for n>0, q(n) = 2*(2*n+1) for n>0. - var uintL N = (intDsize/2)*actuallen; + var uintC N = (intDsize/2)*actuallen; // 4^-N <= 2^(-intDsize*actuallen). - CL_ALLOCA_STACK; - var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var uintL n; - init1(cl_I, bv[0]) (1); - init1(cl_I, pv[0]) (1); - init1(cl_I, qv[0]) (1); - for (n = 1; n < N; n++) { - init1(cl_I, bv[n]) (2*n+1); - init1(cl_I, pv[n]) (n); - init1(cl_I, qv[n]) (2*(2*n+1)); - } - var cl_pqb_series series; - series.bv = bv; - series.pv = pv; series.qv = qv; series.qsv = NULL; - var cl_LF fsum = eval_rational_series(N,series,actuallen); - for (n = 0; n < N; n++) { - bv[n].~cl_I(); - pv[n].~cl_I(); - qv[n].~cl_I(); - } + var cl_LF fsum = eval_rational_series(N,series,actuallen,actuallen); var cl_LF g = scale_float(The(cl_LF)(3*fsum) + The(cl_LF)(pi(actuallen)) * ln(cl_I_to_LF(2,actuallen)+sqrt(cl_I_to_LF(3,actuallen))), -3); - return shorten(g,len); // verkürzen und fertig + return shorten(g,len); // verkürzen und fertig } // Bit complexity (N := len): O(log(N)^2*M(N)). const cl_LF compute_catalanconst_expintegral1 (uintC len) { // We compute f(x) classically and g(x) using the partial sums of f(x). - var uintC actuallen = len+2; // 2 Schutz-Digits - var uintL x = (uintL)(0.693148*intDsize*actuallen)+1; - var uintL N = (uintL)(2.718281828*x); + var uintC actuallen = len+2; // 2 guard digits + var uintC x = (uintC)(0.693148*intDsize*actuallen)+1; + var uintC N = (uintC)(2.718281828*x); var cl_LF fterm = cl_I_to_LF(1,actuallen); var cl_LF fsum = fterm; var cl_LF gterm = fterm; var cl_LF gsum = gterm; - var uintL n; + var uintC n; // After n loops // fterm = x^n/n!, fsum = 1 + x/1! + ... + x^n/n!, // gterm = S_n*x^n/n!, gsum = S_0*x^0/0! + ... + S_n*x^n/n!. @@ -113,7 +115,7 @@ const cl_LF compute_catalanconst_expintegral1 (uintC len) gsum = gsum + gterm; } var cl_LF result = gsum/fsum; - return shorten(result,len); // verkürzen und fertig + return shorten(result,len); // verkürzen und fertig } // Bit complexity (N = len): O(N^2). @@ -121,12 +123,12 @@ const cl_LF compute_catalanconst_expintegral1 (uintC len) // the sums. const cl_LF compute_catalanconst_expintegral2 (uintC len) { - var uintC actuallen = len+2; // 2 Schutz-Digits - var uintL x = (uintL)(0.693148*intDsize*actuallen)+1; - var uintL N = (uintL)(2.718281828*x); + var uintC actuallen = len+2; // 2 guard digits + var uintC x = (uintC)(0.693148*intDsize*actuallen)+1; + var uintC N = (uintC)(2.718281828*x); CL_ALLOCA_STACK; var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term)); - var uintL n; + var uintC n; for (n = 0; n < N; n++) { if (n==0) { init1(cl_I, args[n].p) (1); @@ -145,21 +147,21 @@ const cl_LF compute_catalanconst_expintegral2 (uintC len) args[n].q.~cl_I(); args[n].d.~cl_I(); } - return shorten(result,len); // verkürzen und fertig + return shorten(result,len); // verkürzen und fertig } // Bit complexity (N = len): O(log(N)^2*M(N)). // Using Cohen-Villegas-Zagier acceleration, but without binary splitting. const cl_LF compute_catalanconst_cvz1 (uintC len) { - var uintC actuallen = len+2; // 2 Schutz-Digits - var uintL N = (uintL)(0.39321985*intDsize*actuallen)+1; + var uintC actuallen = len+2; // 2 guard digits + var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1; #if 0 var cl_LF fterm = cl_I_to_LF(2*(cl_I)N*(cl_I)N,actuallen); var cl_LF fsum = fterm; var cl_LF gterm = fterm; var cl_LF gsum = gterm; - var uintL n; + var uintC n; // After n loops // fterm = (N+n)!N/(2n+2)!(N-n-1)!*2^(2n+2), fsum = ... + fterm, // gterm = S_n*fterm, gsum = ... + gterm. @@ -180,7 +182,7 @@ const cl_LF compute_catalanconst_cvz1 (uintC len) var cl_I fsum = fterm; var cl_LF gterm = cl_I_to_LF(fterm,actuallen); var cl_LF gsum = gterm; - var uintL n; + var uintC n; // After n loops // fterm = (N+n)!N/(2n+2)!(N-n-1)!*2^(2n+2), fsum = ... + fterm, // gterm = S_n*fterm, gsum = ... + gterm. @@ -196,18 +198,18 @@ const cl_LF compute_catalanconst_cvz1 (uintC len) } var cl_LF result = gsum/cl_I_to_LF(1+fsum,actuallen); #endif - return shorten(result,len); // verkürzen und fertig + return shorten(result,len); // verkürzen und fertig } // Bit complexity (N = len): O(N^2). // Using Cohen-Villegas-Zagier acceleration, with binary splitting. const cl_LF compute_catalanconst_cvz2 (uintC len) { - var uintC actuallen = len+2; // 2 Schutz-Digits - var uintL N = (uintL)(0.39321985*intDsize*actuallen)+1; + var uintC actuallen = len+2; // 2 guard digits + var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1; CL_ALLOCA_STACK; var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term)); - var uintL n; + var uintC n; for (n = 0; n < N; n++) { init1(cl_I, args[n].p) (2*(cl_I)(N-n)*(cl_I)(N+n)); init1(cl_I, args[n].q) ((cl_I)(2*n+1)*(cl_I)(n+1)); @@ -215,7 +217,7 @@ const cl_LF compute_catalanconst_cvz2 (uintC len) ? square((cl_I)(2*n+1)) : -square((cl_I)(2*n+1))); } - var cl_pqd_series_result sums; + var cl_pqd_series_result sums; eval_pqd_series_aux(N,args,sums); // Here we need U/(1+S) = V/D(Q+T). var cl_LF result = @@ -225,55 +227,95 @@ const cl_LF compute_catalanconst_cvz2 (uintC len) args[n].q.~cl_I(); args[n].d.~cl_I(); } - return shorten(result,len); // verkürzen und fertig + return shorten(result,len); // verkürzen und fertig } // Bit complexity (N = len): O(log(N)^2*M(N)). -// Timings of the above algorithms, on an i486 33 MHz, running Linux. -// N ram ramfast exp1 exp2 cvz1 cvz2 -// 10 0.055 0.068 0.32 0.91 0.076 0.11 -// 25 0.17 0.26 0.95 3.78 0.23 0.43 -// 50 0.43 0.73 2.81 11.5 0.63 1.36 -// 100 1.32 2.24 8.82 34.1 1.90 4.48 -// 250 6.60 10.4 48.7 127.5 10.3 20.8 -// 500 24.0 30.9 186 329 38.4 58.6 -// 1000 83.0 89.0 944 860 149 163 -// 2500 446 352 6964 3096 1032 545 -// 5000 1547 899 -// asymp. N^2 FAST N^2 FAST N^2 FAST + +const cl_LF compute_catalanconst_lupas (uintC len) +{ + // [Alexandru Lupas. Formulae for Some Classical Constants. + // Proceedings of ROGER-2000. + // http://www.lacim.uqam.ca/~plouffe/articles/alupas1.pdf] + // [http://mathworld.wolfram.com/CatalansConstant.html] + // G = 19/18 * sum(n=0..infty, + // mul(m=1..n, -32*((80*m^3+72*m^2-18*m-19)*m^3)/ + // (10240*m^6+14336*m^5+2560*m^4-3072*m^3-888*m^2+72*m+27))). + struct rational_series_stream : cl_pq_series_stream { + cl_I n; + static cl_pq_series_term computenext (cl_pq_series_stream& thisss) + { + var rational_series_stream& thiss = (rational_series_stream&)thisss; + var cl_I n = thiss.n; + var cl_pq_series_term result; + if (zerop(n)) { + result.p = 1; + result.q = 1; + } else { + // Compute -32*((80*n^3+72*n^2-18*n-19)*n^3) (using Horner scheme): + result.p = (19+(18+(-72-80*n)*n)*n)*n*n*n << 5; + // Compute 10240*n^6+14336*n^5+2560*n^4-3072*n^3-888*n^2+72*n+27: + result.q = 27+(72+(-888+(-3072+(2560+(14336+10240*n)*n)*n)*n)*n)*n; + } + thiss.n = plus1(n); + return result; + } + rational_series_stream () + : cl_pq_series_stream (rational_series_stream::computenext), + n (0) {} + } series; + var uintC actuallen = len + 2; // 2 guard digits + var uintC N = (intDsize/2)*actuallen; + var cl_LF fsum = eval_rational_series(N,series,actuallen,actuallen); + var cl_LF g = fsum*cl_I_to_LF(19,actuallen)/cl_I_to_LF(18,actuallen); + return shorten(g,len); +} +// Bit complexity (N = len): O(log(N)^2*M(N)). + +// Timings of the above algorithms, on an AMD Opteron 1.7 GHz, running Linux/x86. +// N ram ramfast exp1 exp2 cvz1 cvz2 lupas +// 25 0.0011 0.0010 0.0094 0.0107 0.0021 0.0016 0.0034 +// 50 0.0030 0.0025 0.0280 0.0317 0.0058 0.0045 0.0095 +// 100 0.0087 0.0067 0.0854 0.0941 0.0176 0.0121 0.0260 +// 250 0.043 0.029 0.462 0.393 0.088 0.055 0.109 +// 500 0.15 0.086 1.7 1.156 0.323 0.162 0.315 +// 1000 0.57 0.25 7.5 3.23 1.27 0.487 0.864 +// 2500 3.24 1.10 52.2 12.4 8.04 2.02 3.33 +// 5000 13.1 3.06 218 32.7 42.1 5.59 8.80 +// 10000 52.7 8.2 910 85.3 216.7 15.3 22.7 +// 25000 342 29.7 6403 295 1643 54.5 77.3 +// 50000 89.9 139 195 +//100000 227 363 483 +// asymp. N^2 FAST N^2 FAST N^2 FAST FAST + // (FAST means O(log(N)^2*M(N))) // -// The "exp1" and "exp2" algorithms are always about 10 to 15 times slower -// than the "ram" and "ramfast" algorithms. -// The break-even point between "ram" and "ramfast" is at about N = 1410. +// The "ramfast" algorithm is always fastest. const cl_LF compute_catalanconst (uintC len) { - if (len >= 1410) - return compute_catalanconst_ramanujan_fast(len); - else - return compute_catalanconst_ramanujan(len); + return compute_catalanconst_ramanujan_fast(len); } // Bit complexity (N := len): O(log(N)^2*M(N)). const cl_LF catalanconst (uintC len) { - var uintC oldlen = TheLfloat(cl_LF_catalanconst)->len; // vorhandene Länge + var uintC oldlen = TheLfloat(cl_LF_catalanconst())->len; // vorhandene Länge if (len < oldlen) - return shorten(cl_LF_catalanconst,len); + return shorten(cl_LF_catalanconst(),len); if (len == oldlen) - return cl_LF_catalanconst; + return cl_LF_catalanconst(); - // TheLfloat(cl_LF_catalanconst)->len um mindestens einen konstanten Faktor - // > 1 wachsen lassen, damit es nicht zu häufig nachberechnet wird: + // TheLfloat(cl_LF_catalanconst())->len um mindestens einen konstanten Faktor + // > 1 wachsen lassen, damit es nicht zu häufig nachberechnet wird: var uintC newlen = len; oldlen += floor(oldlen,2); // oldlen * 3/2 if (newlen < oldlen) newlen = oldlen; - // gewünschte > vorhandene Länge -> muß nachberechnen: - cl_LF_catalanconst = compute_catalanconst(newlen); - return (len < newlen ? shorten(cl_LF_catalanconst,len) : cl_LF_catalanconst); + // gewünschte > vorhandene Länge -> muß nachberechnen: + cl_LF_catalanconst() = compute_catalanconst(newlen); + return (len < newlen ? shorten(cl_LF_catalanconst(),len) : cl_LF_catalanconst()); } } // namespace cln