src/float/transcendental/cl_LF_zeta_int.cc

   1 // cl_zeta().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 // Specification.
   7 #include "cl_F_tran.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cl_lfloat.h"
  13 #include "cl_LF_tran.h"
  14 #include "cl_LF.h"
  15 #include "cl_integer.h"
  16 #include "cl_abort.h"
  17 #include "cl_alloca.h"
  18
  19 const cl_LF compute_zeta_exp (int s, uintC len)
  20 {
  21         // Method:
  22         // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
  23         // with convergence acceleration through exp(x), and evaluated
  24         // using the binary-splitting algorithm.
  25         var uintC actuallen = len+2; // 2 Schutz-Digits
  26         var uintL x = (uintL)(0.693148*intDsize*actuallen)+1;
  27         var uintL N = (uintL)(2.718281828*x);
  28         CL_ALLOCA_STACK;
  29         var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
  30         var uintL n;
  31         for (n = 0; n < N; n++) {
  32                 if (n==0) {
  33                         init1(cl_I, args[n].p) (1);
  34                         init1(cl_I, args[n].q) (1);
  35                 } else {
  36                         init1(cl_I, args[n].p) (x);
  37                         init1(cl_I, args[n].q) (n);
  38                 }
  39                 init1(cl_I, args[n].d) (evenp(n)
  40                                         ? expt_pos(n+1,s)
  41                                         : -expt_pos(n+1,s));
  42         }
  43         var cl_LF result = eval_pqd_series(N,args,actuallen);
  44         for (n = 0; n < N; n++) {
  45                 args[n].p.~cl_I();
  46                 args[n].q.~cl_I();
  47                 args[n].d.~cl_I();
  48         }
  49         result = shorten(result,len); // verkürzen und fertig
  50         // Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
  51         return scale_float(result,s-1) / (ash(1,s-1)-1);
  52 }
  53 // Bit complexity (N = len): O(log(N)^2*M(N)).
  54
  55 const cl_LF compute_zeta_cvz1 (int s, uintC len)
  56 {
  57         // Method:
  58         // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
  59         // with Cohen-Villegas-Zagier convergence acceleration.
  60         var uintC actuallen = len+2; // 2 Schutz-Digits
  61         var uintL N = (uintL)(0.39321985*intDsize*actuallen)+1;
  62         var cl_I fterm = 2*(cl_I)N*(cl_I)N;
  63         var cl_I fsum = fterm;
  64         var cl_LF gterm = cl_I_to_LF(fterm,actuallen);
  65         var cl_LF gsum = gterm;
  66         var uintL n;
  67         // After n loops
  68         //   fterm = (N+n)!N/(2n+2)!(N-n-1)!*2^(2n+2), fsum = ... + fterm,
  69         //   gterm = S_n*fterm, gsum = ... + gterm.
  70         for (n = 1; n < N; n++) {
  71                 fterm = exquopos(fterm*(2*(cl_I)(N-n)*(cl_I)(N+n)),(cl_I)(2*n+1)*(cl_I)(n+1));
  72                 fsum = fsum + fterm;
  73                 gterm = The(cl_LF)(gterm*(2*(cl_I)(N-n)*(cl_I)(N+n)))/((cl_I)(2*n+1)*(cl_I)(n+1));
  74                 if (evenp(n))
  75                         gterm = gterm + cl_I_to_LF(fterm,actuallen)/expt_pos(n+1,s);
  76                 else
  77                         gterm = gterm - cl_I_to_LF(fterm,actuallen)/expt_pos(n+1,s);
  78                 gsum = gsum + gterm;
  79         }
  80         var cl_LF result = gsum/cl_I_to_LF(1+fsum,actuallen);
  81         result = shorten(result,len); // verkürzen und fertig
  82         // Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
  83         return scale_float(result,s-1) / (ash(1,s-1)-1);
  84 }
  85 // Bit complexity (N = len): O(N^2).
  86
  87 const cl_LF compute_zeta_cvz2 (int s, uintC len)
  88 {
  89         // Method:
  90         // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
  91         // with Cohen-Villegas-Zagier convergence acceleration, and
  92         // evaluated using the binary splitting algorithm.
  93         var uintC actuallen = len+2; // 2 Schutz-Digits
  94         var uintL N = (uintL)(0.39321985*intDsize*actuallen)+1;
  95         CL_ALLOCA_STACK;
  96         var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
  97         var uintL n;
  98         for (n = 0; n < N; n++) {
  99                 init1(cl_I, args[n].p) (2*(cl_I)(N-n)*(cl_I)(N+n));
 100                 init1(cl_I, args[n].q) ((cl_I)(2*n+1)*(cl_I)(n+1));
 101                 init1(cl_I, args[n].d) (evenp(n)
 102                                         ? expt_pos(n+1,s)
 103                                         : -expt_pos(n+1,s));
 104         }
 105         var cl_pqd_series_result sums;
 106         eval_pqd_series_aux(N,args,sums);
 107         // Here we need U/(1+S) = V/D(Q+T).
 108         var cl_LF result =
 109           cl_I_to_LF(sums.V,actuallen) / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen));
 110         for (n = 0; n < N; n++) {
 111                 args[n].p.~cl_I();
 112                 args[n].q.~cl_I();
 113                 args[n].d.~cl_I();
 114         }
 115         result = shorten(result,len); // verkürzen und fertig
 116         // Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
 117         return scale_float(result,s-1) / (ash(1,s-1)-1);
 118 }
 119 // Bit complexity (N = len): O(log(N)^2*M(N)).
 120
 121 // Timings of the above algorithm, on an i486 33 MHz, running Linux.
 122 //    s                5                           15
 123 //    N     sum_exp sum_cvz1 sum_cvz2   sum_exp sum_cvz1 sum_cvz2
 124 //    10       2.04    0.09    0.17        8.0     0.11    0.49
 125 //    25       8.6     0.30    0.76       30.6     0.37    2.36
 126 //    50      25.1     0.92    2.49       91.1     1.15    7.9
 127 //   100               2.97    8.46                3.75   24.5
 128 //   250              16.7    36.5                21.7   108
 129 //   500              64.2   106                  85.3   295
 130 //  1000             263     285                 342     788
 131 // asymp.      FAST    N^2     FAST        FAST    N^2     FAST
 132 //
 133 // The break-even point between cvz1 and cvz2 seems to grow linearly with s.
 134
 135 const cl_LF cl_zeta (int s, uintC len)
 136 {
 137         if (!(s > 1))
 138                 cl_abort();
 139         if (len < 280*(uintL)s)
 140                 return compute_zeta_cvz1(s,len);
 141         else
 142                 return compute_zeta_cvz2(s,len);
 143 }
 144 // Bit complexity (N = len): O(log(N)^2*M(N)).