1 // Computation of arctan(1/m) (m integer) to high precision.
3 #include "cln/integer.h"
4 #include "cln/rational.h"
6 #include "cln/lfloat.h"
8 #include "cl_LF_tran.h"
12 #include "cln/timing.h"
16 #define floor cln_floor
20 // Method 1: atan(1/m) = sum(n=0..infty, (-1)^n/(2n+1) * 1/m^(2n+1))
21 // Method 2: atan(1/m) = sum(n=0..infty, 4^n*n!^2/(2n+1)! * m/(m^2+1)^(n+1))
22 // a. Using long floats. [N^2]
23 // b. Simulating long floats using integers. [N^2]
24 // c. Using integers, no binary splitting. [N^2]
25 // d. Using integers, with binary splitting. [FAST]
26 // Method 3: general built-in algorithm. [FAST]
29 // Method 1: atan(1/m) = sum(n=0..infty, (-1)^n/(2n+1) * 1/m^(2n+1))
31 const cl_LF atan_recip_1a (cl_I m, uintC len)
33 var uintC actuallen = len + 1;
34 var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintC)actuallen);
36 var cl_LF fterm = cl_I_to_LF(1,actuallen)/m;
37 var cl_LF fsum = fterm;
38 for (var uintC n = 1; fterm >= eps; n++) {
40 fterm = cl_LF_shortenwith(fterm,eps);
42 fsum = fsum + LF_to_LF(fterm/(2*n+1),actuallen);
44 fsum = fsum - LF_to_LF(fterm/(2*n+1),actuallen);
46 return shorten(fsum,len);
49 const cl_LF atan_recip_1b (cl_I m, uintC len)
51 var uintC actuallen = len + 1;
53 var cl_I fterm = floor1((cl_I)1 << (intDsize*actuallen), m);
54 var cl_I fsum = fterm;
55 for (var uintC n = 1; fterm > 0; n++) {
56 fterm = floor1(fterm,m2);
58 fsum = fsum + floor1(fterm,2*n+1);
60 fsum = fsum - floor1(fterm,2*n+1);
62 return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintC)actuallen);
65 const cl_LF atan_recip_1c (cl_I m, uintC len)
67 var uintC actuallen = len + 1;
69 var sintC N = (sintC)(0.69314718*intDsize/2*actuallen/log(double_approx(m))) + 1;
70 var cl_I num = 0, den = 1; // "lazy rational number"
71 for (sintC n = N-1; n>=0; n--) {
72 // Multiply sum with 1/m^2:
76 num = num*(2*n+1) + den;
78 num = num*(2*n+1) - den;
82 var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
83 return shorten(result,len);
86 const cl_LF atan_recip_1d (cl_I m, uintC len)
88 var uintC actuallen = len + 1;
90 var uintC N = (uintC)(0.69314718*intDsize/2*actuallen/log(double_approx(m))) + 1;
92 var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
93 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
95 for (n = 0; n < N; n++) {
96 new (&bv[n]) cl_I ((n % 2) == 0 ? (cl_I)(2*n+1) : -(cl_I)(2*n+1));
97 new (&qv[n]) cl_I (n==0 ? m : m2);
99 var cl_rational_series series;
100 series.av = NULL; series.bv = bv;
101 series.pv = NULL; series.qv = qv; series.qsv = NULL;
102 var cl_LF result = eval_rational_series(N,series,actuallen);
103 for (n = 0; n < N; n++) {
107 return shorten(result,len);
111 // Method 2: atan(1/m) = sum(n=0..infty, 4^n*n!^2/(2n+1)! * m/(m^2+1)^(n+1))
113 const cl_LF atan_recip_2a (cl_I m, uintC len)
115 var uintC actuallen = len + 1;
116 var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintC)actuallen);
118 var cl_LF fterm = cl_I_to_LF(m,actuallen)/m2;
119 var cl_LF fsum = fterm;
120 for (var uintC n = 1; fterm >= eps; n++) {
121 fterm = The(cl_LF)((2*n)*fterm)/((2*n+1)*m2);
122 fterm = cl_LF_shortenwith(fterm,eps);
123 fsum = fsum + LF_to_LF(fterm,actuallen);
125 return shorten(fsum,len);
128 const cl_LF atan_recip_2b (cl_I m, uintC len)
130 var uintC actuallen = len + 1;
132 var cl_I fterm = floor1((cl_I)m << (intDsize*actuallen), m2);
133 var cl_I fsum = fterm;
134 for (var uintC n = 1; fterm > 0; n++) {
135 fterm = floor1((2*n)*fterm,(2*n+1)*m2);
138 return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintC)actuallen);
141 const cl_LF atan_recip_2c (cl_I m, uintC len)
143 var uintC actuallen = len + 1;
145 var uintC N = (uintC)(0.69314718*intDsize*actuallen/log(double_approx(m2))) + 1;
146 var cl_I num = 0, den = 1; // "lazy rational number"
147 for (uintC n = N; n>0; n--) {
148 // Multiply sum with (2n)/(2n+1)(m^2+1):
150 den = den * ((2*n+1)*m2);
156 var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
157 return shorten(result,len);
160 const cl_LF atan_recip_2d (cl_I m, uintC len)
162 var uintC actuallen = len + 1;
164 var uintC N = (uintC)(0.69314718*intDsize*actuallen/log(double_approx(m2))) + 1;
166 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
167 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
169 new (&pv[0]) cl_I (m);
170 new (&qv[0]) cl_I (m2);
171 for (n = 1; n < N; n++) {
172 new (&pv[n]) cl_I (2*n);
173 new (&qv[n]) cl_I ((2*n+1)*m2);
175 var cl_rational_series series;
176 series.av = NULL; series.bv = NULL;
177 series.pv = pv; series.qv = qv; series.qsv = NULL;
178 var cl_LF result = eval_rational_series(N,series,actuallen);
179 for (n = 0; n < N; n++) {
183 return shorten(result,len);
187 // Main program: Compute and display the timings.
189 int main (int argc, char * argv[])
192 if ((argc >= 3) && !strcmp(argv[1],"-r")) {
193 repetitions = atoi(argv[2]);
194 argc -= 2; argv += 2;
198 cl_I m = (cl_I)argv[1];
199 uintC len = atol(argv[2]);
201 ln(cl_I_to_LF(1000,len+10)); // fill cache
204 for (int rep = repetitions; rep > 0; rep--)
205 { p = atan_recip_1a(m,len); }
209 for (int rep = repetitions; rep > 0; rep--)
210 { p = atan_recip_1b(m,len); }
214 for (int rep = repetitions; rep > 0; rep--)
215 { p = atan_recip_1c(m,len); }
219 for (int rep = repetitions; rep > 0; rep--)
220 { p = atan_recip_1d(m,len); }
225 for (int rep = repetitions; rep > 0; rep--)
226 { p = atan_recip_2a(m,len); }
230 for (int rep = repetitions; rep > 0; rep--)
231 { p = atan_recip_2b(m,len); }
235 for (int rep = repetitions; rep > 0; rep--)
236 { p = atan_recip_2c(m,len); }
240 for (int rep = repetitions; rep > 0; rep--)
241 { p = atan_recip_2d(m,len); }
246 for (int rep = repetitions; rep > 0; rep--)
247 { p = The(cl_LF)(atan(cl_RA_to_LF(1/(cl_RA)m,len))); }
253 // Timings of the above algorithms, on an i486 33 MHz, running Linux.
254 // m = 390112. (For Jörg Arndt's formula (4.15).)
255 // N 1a 1b 1c 1d 2a 2b 2c 2d 3
256 // 10 0.0027 0.0018 0.0019 0.0019 0.0032 0.0022 0.0019 0.0019 0.0042
257 // 25 0.0085 0.0061 0.0058 0.0061 0.0095 0.0069 0.0056 0.0061 0.028
258 // 50 0.024 0.018 0.017 0.017 0.026 0.020 0.016 0.017 0.149
259 // 100 0.075 0.061 0.057 0.054 0.079 0.065 0.052 0.052 0.71
260 // 250 0.41 0.33 0.32 0.26 0.42 0.36 0.28 0.24 3.66
261 // 500 1.57 1.31 1.22 0.88 1.57 1.36 1.10 0.83 13.7
262 // 1000 6.08 5.14 4.56 2.76 6.12 5.36 4.06 2.58 45.5
263 // 2500 36.5 32.2 25.8 10.2 38.4 33.6 22.2 9.1 191
266 // asymp. N^2 N^2 N^2 FAST N^2 N^2 N^2 FAST FAST
268 // m = 319. (For Jörg Arndt's formula (4.7).)
269 // N 1a 1b 1c 1d 2a 2b 2c 2d 3
270 // 1000 6.06 4.40 9.17 3.82 5.29 3.90 7.50 3.53 50.3
272 // m = 18. (For Jörg Arndt's formula (4.4).)
273 // N 1a 1b 1c 1d 2a 2b 2c 2d 3
274 // 1000 11.8 9.0 22.3 6.0 10.2 7.7 17.1 5.7 54.3