src/float/transcendental/cl_LF_zeta_int.cc

   1 // zeta().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 // Specification.
   7 #include "cl_F_tran.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cln/lfloat.h"
  13 #include "cl_LF_tran.h"
  14 #include "cl_LF.h"
  15 #include "cln/integer.h"
  16 #include "cln/exception.h"
  17 #include "cl_alloca.h"
  18
  19 namespace cln {
  20
  21 const cl_LF compute_zeta_exp (int s, uintC len)
  22 {
  23         // Method:
  24         // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
  25         // with convergence acceleration through exp(x), and evaluated
  26         // using the binary-splitting algorithm.
  27         var uintC actuallen = len+2; // 2 Schutz-Digits
  28         var uintC x = (uintC)(0.693148*intDsize*actuallen)+1;
  29         var uintC N = (uintC)(2.718281828*x);
  30         CL_ALLOCA_STACK;
  31         var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term));
  32         var uintC n;
  33         for (n = 0; n < N; n++) {
  34                 if (n==0) {
  35                         init1(cl_I, args[n].p) (1);
  36                         init1(cl_I, args[n].q) (1);
  37                 } else {
  38                         init1(cl_I, args[n].p) (x);
  39                         init1(cl_I, args[n].q) (n);
  40                 }
  41                 init1(cl_I, args[n].d) (evenp(n)
  42                                         ? expt_pos(n+1,s)
  43                                         : -expt_pos(n+1,s));
  44         }
  45         var cl_LF result = eval_pqd_series(N,args,actuallen);
  46         for (n = 0; n < N; n++) {
  47                 args[n].p.~cl_I();
  48                 args[n].q.~cl_I();
  49                 args[n].d.~cl_I();
  50         }
  51         result = shorten(result,len); // verkürzen und fertig
  52         // Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
  53         return scale_float(result,s-1) / (ash(1,s-1)-1);
  54 }
  55 // Bit complexity (N = len): O(log(N)^2*M(N)).
  56
  57 const cl_LF compute_zeta_cvz1 (int s, uintC len)
  58 {
  59         // Method:
  60         // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
  61         // with Cohen-Villegas-Zagier convergence acceleration.
  62         var uintC actuallen = len+2; // 2 Schutz-Digits
  63         var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1;
  64         var cl_I fterm = 2*(cl_I)N*(cl_I)N;
  65         var cl_I fsum = fterm;
  66         var cl_LF gterm = cl_I_to_LF(fterm,actuallen);
  67         var cl_LF gsum = gterm;
  68         var uintC n;
  69         // After n loops
  70         //   fterm = (N+n)!N/(2n+2)!(N-n-1)!*2^(2n+2), fsum = ... + fterm,
  71         //   gterm = S_n*fterm, gsum = ... + gterm.
  72         for (n = 1; n < N; n++) {
  73                 fterm = exquopos(fterm*(2*(cl_I)(N-n)*(cl_I)(N+n)),(cl_I)(2*n+1)*(cl_I)(n+1));
  74                 fsum = fsum + fterm;
  75                 gterm = The(cl_LF)(gterm*(2*(cl_I)(N-n)*(cl_I)(N+n)))/((cl_I)(2*n+1)*(cl_I)(n+1));
  76                 if (evenp(n))
  77                         gterm = gterm + cl_I_to_LF(fterm,actuallen)/expt_pos(n+1,s);
  78                 else
  79                         gterm = gterm - cl_I_to_LF(fterm,actuallen)/expt_pos(n+1,s);
  80                 gsum = gsum + gterm;
  81         }
  82         var cl_LF result = gsum/cl_I_to_LF(1+fsum,actuallen);
  83         result = shorten(result,len); // verkürzen und fertig
  84         // Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
  85         return scale_float(result,s-1) / (ash(1,s-1)-1);
  86 }
  87 // Bit complexity (N = len): O(N^2).
  88
  89 const cl_LF compute_zeta_cvz2 (int s, uintC len)
  90 {
  91         // Method:
  92         // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s),
  93         // with Cohen-Villegas-Zagier convergence acceleration, and
  94         // evaluated using the binary splitting algorithm with truncation.
  95         var uintC actuallen = len+2; // 2 guard digits
  96         var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1;
  97         var uintC n;
  98         struct rational_series_stream : cl_pqd_series_stream {
  99                 uintC n;
 100                 int s;
 101                 uintC N;
 102                 static cl_pqd_series_term computenext (cl_pqd_series_stream& thisss)
 103                 {
 104                         var rational_series_stream& thiss = (rational_series_stream&)thisss;
 105                         var uintC n = thiss.n;
 106                         var uintC s = thiss.s;
 107                         var uintC N = thiss.N;
 108                         var cl_pqd_series_term result;
 109                         result.p = 2*(cl_I)(N-n)*(cl_I)(N+n);
 110                         result.q = (cl_I)(2*n+1)*(cl_I)(n+1);
 111                         result.d = evenp(n) ? expt_pos(n+1,s) : -expt_pos(n+1,s);
 112                         thiss.n = n+1;
 113                         return result;
 114                 }
 115                 rational_series_stream (int _s, uintC _N)
 116                         : cl_pqd_series_stream (rational_series_stream::computenext),
 117                           n (0), s (_s), N (_N) {}
 118         } series(s,N);
 119         var cl_pqd_series_result<cl_I> sums;
 120         eval_pqd_series_aux(N,series,sums,actuallen);
 121         // Here we need U/(1+S) = V/D(Q+T).
 122         var cl_LF result =
 123           cl_I_to_LF(sums.V,actuallen) / The(cl_LF)(sums.D * cl_I_to_LF(sums.Q+sums.T,actuallen));
 124         result = shorten(result,len); // verkürzen und fertig
 125         // Zum Schluss mit 2^(s-1)/(2^(s-1)-1) multiplizieren:
 126         return scale_float(result,s-1) / (ash(1,s-1)-1);
 127 }
 128 // Bit complexity (N = len): O(log(N)^2*M(N)).
 129
 130 // Timings of the above algorithm in seconds, on a P-4, 3GHz, running Linux.
 131 //    s                5                          15
 132 //    N     sum_exp sum_cvz1 sum_cvz2   sum_exp sum_cvz1 sum_cvz2
 133 //   125      0.60    0.04     0.06       1.88    0.04     0.20
 134 //   250      1.60    0.13     0.19       4.82    0.15     0.58
 135 //   500      4.3     0.48     0.60      12.2     0.55     1.67
 136 //  1000     11.0     1.87     1.63      31.7     2.11     4.60
 137 //  2000     28.0     7.4      4.23     111       8.2     11.3
 138 //  4000     70.2    30.6     10.6               50       44
 139 //  8000            142       26.8              169       75
 140 // asymp.      FAST    N^2     FAST        FAST    N^2     FAST
 141 //
 142 //    s               35                          75
 143 //    N     sum_exp sum_cvz1 sum_cvz2   sum_exp sum_cvz1 sum_cvz2
 144 //   125      4.70    0.05     0.53      11.3     0.07     1.35
 145 //   250     12.5     0.19     1.62      28.7     0.25     3.74
 146 //   500     31.3     0.69     4.40      70.2     0.96    10.2
 147 //  1000     88.8     2.70    11.4      191       3.76    25.4
 148 //  2000             10.9     28.9               15.6     64.3
 149 //  4000             46       73                 64.4    170
 150 //  8000            215      178                295      397
 151 // 16000            898      419               1290      972
 152 // asymp.      FAST    N^2     FAST        FAST    N^2     FAST
 153 //
 154 // The break-even point between cvz1 and cvz2 seems to grow linearly with s.
 155
 156 // Timings of the above algorithm, on an i486 33 MHz, running Linux.
 157 //    s                5                           15
 158 //    N     sum_exp sum_cvz1 sum_cvz2   sum_exp sum_cvz1 sum_cvz2
 159 //    10       2.04    0.09    0.17        8.0     0.11    0.49
 160 //    25       8.6     0.30    0.76       30.6     0.37    2.36
 161 //    50      25.1     0.92    2.49       91.1     1.15    7.9
 162 //   100               2.97    8.46                3.75   24.5
 163 //   250              16.7    36.5                21.7   108
 164 //   500              64.2   106                  85.3   295
 165 //  1000             263     285                 342     788
 166 // asymp.      FAST    N^2     FAST        FAST    N^2     FAST
 167 //
 168 // The break-even point between cvz1 and cvz2 seems to grow linearly with s.
 169
 170 const cl_LF zeta (int s, uintC len)
 171 {
 172         if (!(s > 1))
 173                 throw runtime_exception("zeta(s) with illegal s<2.");
 174         if (s==3)
 175                 return zeta3(len);
 176         if (len < 220*(uintC)s)
 177                 return compute_zeta_cvz1(s,len);
 178         else
 179                 return compute_zeta_cvz2(s,len);
 180 }
 181 // Bit complexity (N = len): O(log(N)^2*M(N)).
 182
 183 }  // namespace cln