src/float/transcendental/cl_LF_catalanconst.cc

   1 // catalanconst().
   2
   3 // General includes.
   4 #include "base/cl_sysdep.h"
   5
   6 // Specification.
   7 #include "float/transcendental/cl_F_tran.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cln/lfloat.h"
  13 #include "float/transcendental/cl_LF_tran.h"
  14 #include "float/lfloat/cl_LF.h"
  15 #include "cln/integer.h"
  16 #include "base/cl_alloca.h"
  17
  18 namespace cln {
  19
  20 const cl_LF compute_catalanconst_ramanujan (uintC len)
  21 {
  22         // [Jonathan M. Borwein, Peter B. Borwein: Pi and the AGM.
  23         //  Wiley 1987. Section 11.3, exercise 16 g, p. 386]
  24         // G = 3/8 * sum(n=0..infty, n!^2 / (2n+1)!*(2n+1))
  25         //     + pi/8 * log(2+sqrt(3)).
  26         // Every summand gives 0.6 new decimal digits in precision.
  27         // The sum is best evaluated using fixed-point arithmetic,
  28         // so that the precision is reduced for the later summands.
  29         var uintC actuallen = len + 2; // 2 guard digits
  30         var sintC scale = intDsize*actuallen;
  31         var cl_I sum = 0;
  32         var cl_I n = 0;
  33         var cl_I factor = ash(1,scale);
  34         while (!zerop(factor)) {
  35                 sum = sum + truncate1(factor,2*n+1);
  36                 n = n+1;
  37                 factor = truncate1(factor*n,2*(2*n+1));
  38         }
  39         var cl_LF fsum = scale_float(cl_I_to_LF(sum,actuallen),-scale);
  40         var cl_LF g =
  41           scale_float(The(cl_LF)(3*fsum)
  42                       + The(cl_LF)(pi(actuallen))
  43                         * ln(cl_I_to_LF(2,actuallen)+sqrt(cl_I_to_LF(3,actuallen))),
  44                       -3);
  45         return shorten(g,len); // verkürzen und fertig
  46 }
  47 // Bit complexity (N := len): O(N^2).
  48
  49 const cl_LF compute_catalanconst_ramanujan_fast (uintC len)
  50 {
  51         // Same formula as above, using a binary splitting evaluation.
  52         // See [Borwein, Borwein, section 10.2.3].
  53         struct rational_series_stream : cl_pqb_series_stream {
  54                 cl_I n;
  55                 static cl_pqb_series_term computenext (cl_pqb_series_stream& thisss)
  56                 {
  57                         var rational_series_stream& thiss = (rational_series_stream&)thisss;
  58                         var cl_I n = thiss.n;
  59                         var cl_pqb_series_term result;
  60                         if (n==0) {
  61                                 result.p = 1;
  62                                 result.q = 1;
  63                                 result.b = 1;
  64                         } else {
  65                                 result.p = n;
  66                                 result.b = 2*n+1;
  67                                 result.q = result.b << 1; // 2*(2*n+1)
  68                         }
  69                         thiss.n = n+1;
  70                         return result;
  71                 }
  72                 rational_series_stream ()
  73                         : cl_pqb_series_stream (rational_series_stream::computenext),
  74                           n (0) {}
  75         } series;
  76         var uintC actuallen = len + 2; // 2 guard digits
  77         // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n)))
  78         // with appropriate N, and
  79         //   a(n) = 1, b(n) = 2*n+1,
  80         //   p(n) = n for n>0, q(n) = 2*(2*n+1) for n>0.
  81         var uintC N = (intDsize/2)*actuallen;
  82         // 4^-N <= 2^(-intDsize*actuallen).
  83         var cl_LF fsum = eval_rational_series<false>(N,series,actuallen,actuallen);
  84         var cl_LF g =
  85           scale_float(The(cl_LF)(3*fsum)
  86                       + The(cl_LF)(pi(actuallen))
  87                         * ln(cl_I_to_LF(2,actuallen)+sqrt(cl_I_to_LF(3,actuallen))),
  88                       -3);
  89         return shorten(g,len); // verkürzen und fertig
  90 }
  91 // Bit complexity (N := len): O(log(N)^2*M(N)).
  92
  93 const cl_LF compute_catalanconst_expintegral1 (uintC len)
  94 {
  95         // We compute f(x) classically and g(x) using the partial sums of f(x).
  96         var uintC actuallen = len+2; // 2 guard digits
  97         var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
  98         var uintC N = (uintC)(2.718281828*x);
  99         var cl_LF fterm = cl_I_to_LF(1,actuallen);
 100         var cl_LF fsum = fterm;
 101         var cl_LF gterm = fterm;
 102         var cl_LF gsum = gterm;
 103         var uintC n;
 104         // After n loops
 105         //   fterm = x^n/n!, fsum = 1 + x/1! + ... + x^n/n!,
 106         //   gterm = S_n*x^n/n!, gsum = S_0*x^0/0! + ... + S_n*x^n/n!.
 107         for (n = 1; n < N; n++) {
 108                 fterm = The(cl_LF)(fterm*x)/n;
 109                 fsum = fsum + fterm;
 110                 gterm = The(cl_LF)(gterm*x)/n;
 111                 if (evenp(n))
 112                         gterm = gterm + fterm/square((cl_I)(2*n+1));
 113                 else
 114                         gterm = gterm - fterm/square((cl_I)(2*n+1));
 115                 gsum = gsum + gterm;
 116         }
 117         var cl_LF result = gsum/fsum;
 118         return shorten(result,len); // verkürzen und fertig
 119 }
 120 // Bit complexity (N = len): O(N^2).
 121
 122 // Same algorithm as expintegral1, but using binary splitting to evaluate
 123 // the sums.
 124 const cl_LF compute_catalanconst_expintegral2 (uintC len)
 125 {
 126         var uintC actuallen = len+2; // 2 guard digits
 127         var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
 128         var uintC N = (uintC)(2.718281828*x);
 129         CL_ALLOCA_STACK;
 130         var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
 131         var uintC n;
 132         for (n = 0; n < N; n++) {
 133                 if (n==0) {
 134                         init1(cl_I, args[n].p) (1);
 135                         init1(cl_I, args[n].q) (1);
 136                 } else {
 137                         init1(cl_I, args[n].p) (x);
 138                         init1(cl_I, args[n].q) (n);
 139                 }
 140                 init1(cl_I, args[n].d) (evenp(n)
 141                                         ? square((cl_I)(2*n+1))
 142                                         : -square((cl_I)(2*n+1)));
 143         }
 144         var cl_LF result = eval_pqd_series(N,args,actuallen);
 145         for (n = 0; n < N; n++) {
 146                 args[n].p.~cl_I();
 147                 args[n].q.~cl_I();
 148                 args[n].d.~cl_I();
 149         }
 150         return shorten(result,len); // verkürzen und fertig
 151 }
 152 // Bit complexity (N = len): O(log(N)^2*M(N)).
 153
 154 // Using Cohen-Villegas-Zagier acceleration, but without binary splitting.
 155 const cl_LF compute_catalanconst_cvz1 (uintC len)
 156 {
 157         var uintC actuallen = len+2; // 2 guard digits
 158         var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1;
 159 #if 0
 160         var cl_LF fterm = cl_I_to_LF(2*(cl_I)N*(cl_I)N,actuallen);
 161         var cl_LF fsum = fterm;
 162         var cl_LF gterm = fterm;
 163         var cl_LF gsum = gterm;
 164         var uintC n;
 165         // After n loops
 166         //   fterm = (N+n)!N/(2n+2)!(N-n-1)!*2^(2n+2), fsum = ... + fterm,
 167         //   gterm = S_n*fterm, gsum = ... + gterm.
 168         for (n = 1; n < N; n++) {
 169                 fterm = The(cl_LF)(fterm*(2*(cl_I)(N-n)*(cl_I)(N+n)))/((cl_I)(2*n+1)*(cl_I)(n+1));
 170                 fsum = fsum + fterm;
 171                 gterm = The(cl_LF)(gterm*(2*(cl_I)(N-n)*(cl_I)(N+n)))/((cl_I)(2*n+1)*(cl_I)(n+1));
 172                 if (evenp(n))
 173                         gterm = gterm + fterm/square((cl_I)(2*n+1));
 174                 else
 175                         gterm = gterm - fterm/square((cl_I)(2*n+1));
 176                 gsum = gsum + gterm;
 177         }
 178         var cl_LF result = gsum/(cl_I_to_LF(1,actuallen)+fsum);
 179 #else
 180         // Take advantage of the fact that fterm and fsum are integers.
 181         var cl_I fterm = 2*(cl_I)N*(cl_I)N;
 182         var cl_I fsum = fterm;
 183         var cl_LF gterm = cl_I_to_LF(fterm,actuallen);
 184         var cl_LF gsum = gterm;
 185         var uintC n;
 186         // After n loops
 187         //   fterm = (N+n)!N/(2n+2)!(N-n-1)!*2^(2n+2), fsum = ... + fterm,
 188         //   gterm = S_n*fterm, gsum = ... + gterm.
 189         for (n = 1; n < N; n++) {
 190                 fterm = exquopos(fterm*(2*(cl_I)(N-n)*(cl_I)(N+n)),(cl_I)(2*n+1)*(cl_I)(n+1));
 191                 fsum = fsum + fterm;
 192                 gterm = The(cl_LF)(gterm*(2*(cl_I)(N-n)*(cl_I)(N+n)))/((cl_I)(2*n+1)*(cl_I)(n+1));
 193                 if (evenp(n))
 194                         gterm = gterm + cl_I_to_LF(fterm,actuallen)/square((cl_I)(2*n+1));
 195                 else
 196                         gterm = gterm - cl_I_to_LF(fterm,actuallen)/square((cl_I)(2*n+1));
 197                 gsum = gsum + gterm;
 198         }
 199         var cl_LF result = gsum/cl_I_to_LF(1+fsum,actuallen);
 200 #endif
 201         return shorten(result,len); // verkürzen und fertig
 202 }
 203 // Bit complexity (N = len): O(N^2).
 204
 205 // Using Cohen-Villegas-Zagier acceleration, with binary splitting.
 206 const cl_LF compute_catalanconst_cvz2 (uintC len)
 207 {
 208         var uintC actuallen = len+2; // 2 guard digits
 209         var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1;
 210         CL_ALLOCA_STACK;
 211         var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
 212         var uintC n;
 213         for (n = 0; n < N; n++) {
 214                 init1(cl_I, args[n].p) (2*(cl_I)(N-n)*(cl_I)(N+n));
 215                 init1(cl_I, args[n].q) ((cl_I)(2*n+1)*(cl_I)(n+1));
 216                 init1(cl_I, args[n].d) (evenp(n)
 217                                         ? square((cl_I)(2*n+1))
 218                                         : -square((cl_I)(2*n+1)));
 219         }
 220         var cl_pqd_series_result<cl_I> sums;
 221         eval_pqd_series_aux(N,args,sums);
 222         // Here we need U/(1+S) = V/D(Q+T).
 223         var cl_LF result =
 224           cl_I_to_LF(sums.V,actuallen) / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen));
 225         for (n = 0; n < N; n++) {
 226                 args[n].p.~cl_I();
 227                 args[n].q.~cl_I();
 228                 args[n].d.~cl_I();
 229         }
 230         return shorten(result,len); // verkürzen und fertig
 231 }
 232 // Bit complexity (N = len): O(log(N)^2*M(N)).
 233
 234
 235 const cl_LF compute_catalanconst_lupas (uintC len)
 236 {
 237         // [Alexandru Lupas. Formulae for Some Classical Constants.
 238         //  Proceedings of ROGER-2000.
 239         //  http://www.lacim.uqam.ca/~plouffe/articles/alupas1.pdf]
 240         // [http://mathworld.wolfram.com/CatalansConstant.html]
 241         // G = 19/18 * sum(n=0..infty,
 242         //                 mul(m=1..n, -32*((80*m^3+72*m^2-18*m-19)*m^3)/
 243         //                             (10240*m^6+14336*m^5+2560*m^4-3072*m^3-888*m^2+72*m+27))).
 244         struct rational_series_stream : cl_pq_series_stream {
 245                 cl_I n;
 246                 static cl_pq_series_term computenext (cl_pq_series_stream& thisss)
 247                 {
 248                         var rational_series_stream& thiss = (rational_series_stream&)thisss;
 249                         var cl_I n = thiss.n;
 250                         var cl_pq_series_term result;
 251                         if (zerop(n)) {
 252                                 result.p = 1;
 253                                 result.q = 1;
 254                         } else {
 255                                 // Compute -32*((80*n^3+72*n^2-18*n-19)*n^3) (using Horner scheme):
 256                                 result.p = (19+(18+(-72-80*n)*n)*n)*n*n*n << 5;
 257                                 // Compute 10240*n^6+14336*n^5+2560*n^4-3072*n^3-888*n^2+72*n+27:
 258                                 result.q = 27+(72+(-888+(-3072+(2560+(14336+10240*n)*n)*n)*n)*n)*n;
 259                         }
 260                         thiss.n = plus1(n);
 261                         return result;
 262                 }
 263                 rational_series_stream ()
 264                         : cl_pq_series_stream (rational_series_stream::computenext),
 265                           n (0) {}
 266         } series;
 267         var uintC actuallen = len + 2; // 2 guard digits
 268         var uintC N = (intDsize/2)*actuallen;
 269         var cl_LF fsum = eval_rational_series<false>(N,series,actuallen,actuallen);
 270         var cl_LF g = fsum*cl_I_to_LF(19,actuallen)/cl_I_to_LF(18,actuallen);
 271         return shorten(g,len);
 272 }
 273 // Bit complexity (N = len): O(log(N)^2*M(N)).
 274
 275 // Timings of the above algorithms, on an AMD Opteron 1.7 GHz, running Linux/x86.
 276 //    N      ram    ramfast  exp1    exp2    cvz1    cvz2    lupas
 277 //    25     0.0011  0.0010  0.0094  0.0107  0.0021  0.0016  0.0034
 278 //    50     0.0030  0.0025  0.0280  0.0317  0.0058  0.0045  0.0095
 279 //   100     0.0087  0.0067  0.0854  0.0941  0.0176  0.0121  0.0260
 280 //   250     0.043   0.029   0.462   0.393   0.088   0.055   0.109
 281 //   500     0.15    0.086   1.7     1.156   0.323   0.162   0.315
 282 //  1000     0.57    0.25    7.5     3.23    1.27    0.487   0.864
 283 //  2500     3.24    1.10   52.2    12.4     8.04    2.02    3.33
 284 //  5000    13.1     3.06  218      32.7    42.1     5.59    8.80
 285 // 10000    52.7     8.2   910      85.3   216.7    15.3    22.7
 286 // 25000   342      29.7  6403     295    1643      54.5    77.3
 287 // 50000            89.9                           139     195
 288 //100000           227                             363     483
 289 // asymp.    N^2     FAST    N^2     FAST    N^2     FAST    FAST
 290
 291 // (FAST means O(log(N)^2*M(N)))
 292 //
 293 // The "ramfast" algorithm is always fastest.
 294
 295 const cl_LF compute_catalanconst (uintC len)
 296 {
 297         return compute_catalanconst_ramanujan_fast(len);
 298 }
 299 // Bit complexity (N := len): O(log(N)^2*M(N)).
 300
 301 const cl_LF catalanconst (uintC len)
 302 {
 303         var uintC oldlen = TheLfloat(cl_LF_catalanconst())->len; // vorhandene Länge
 304         if (len < oldlen)
 305                 return shorten(cl_LF_catalanconst(),len);
 306         if (len == oldlen)
 307                 return cl_LF_catalanconst();
 308
 309         // TheLfloat(cl_LF_catalanconst())->len um mindestens einen konstanten Faktor
 310         // > 1 wachsen lassen, damit es nicht zu häufig nachberechnet wird:
 311         var uintC newlen = len;
 312         oldlen += floor(oldlen,2); // oldlen * 3/2
 313         if (newlen < oldlen)
 314                 newlen = oldlen;
 315
 316         // gewünschte > vorhandene Länge -> muß nachberechnen:
 317         cl_LF_catalanconst() = compute_catalanconst(newlen);
 318         return (len < newlen ? shorten(cl_LF_catalanconst(),len) : cl_LF_catalanconst());
 319 }
 320
 321 }  // namespace cln