1 // Computation of arctan(1/m) (m integer) to high precision.
3 #include "cl_integer.h"
4 #include "cl_rational.h"
8 #include "cl_LF_tran.h"
12 #include "cl_timing.h"
16 #define floor cln_floor
19 // Method 1: atan(1/m) = sum(n=0..infty, (-1)^n/(2n+1) * 1/m^(2n+1))
20 // Method 2: atan(1/m) = sum(n=0..infty, 4^n*n!^2/(2n+1)! * m/(m^2+1)^(n+1))
21 // a. Using long floats. [N^2]
22 // b. Simulating long floats using integers. [N^2]
23 // c. Using integers, no binary splitting. [N^2]
24 // d. Using integers, with binary splitting. [FAST]
25 // Method 3: general built-in algorithm. [FAST]
28 // Method 1: atan(1/m) = sum(n=0..infty, (-1)^n/(2n+1) * 1/m^(2n+1))
30 const cl_LF atan_recip_1a (cl_I m, uintC len)
32 var uintC actuallen = len + 1;
33 var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
35 var cl_LF fterm = cl_I_to_LF(1,actuallen)/m;
36 var cl_LF fsum = fterm;
37 for (var uintL n = 1; fterm >= eps; n++) {
39 fterm = cl_LF_shortenwith(fterm,eps);
41 fsum = fsum + LF_to_LF(fterm/(2*n+1),actuallen);
43 fsum = fsum - LF_to_LF(fterm/(2*n+1),actuallen);
45 return shorten(fsum,len);
48 const cl_LF atan_recip_1b (cl_I m, uintC len)
50 var uintC actuallen = len + 1;
52 var cl_I fterm = floor1((cl_I)1 << (intDsize*actuallen), m);
53 var cl_I fsum = fterm;
54 for (var uintL n = 1; fterm > 0; n++) {
55 fterm = floor1(fterm,m2);
57 fsum = fsum + floor1(fterm,2*n+1);
59 fsum = fsum - floor1(fterm,2*n+1);
61 return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
64 const cl_LF atan_recip_1c (cl_I m, uintC len)
66 var uintC actuallen = len + 1;
68 var sintL N = (sintL)(0.69314718*intDsize/2*actuallen/log(cl_double_approx(m))) + 1;
69 var cl_I num = 0, den = 1; // "lazy rational number"
70 for (sintL n = N-1; n>=0; n--) {
71 // Multiply sum with 1/m^2:
75 num = num*(2*n+1) + den;
77 num = num*(2*n+1) - den;
81 var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
82 return shorten(result,len);
85 const cl_LF atan_recip_1d (cl_I m, uintC len)
87 var uintC actuallen = len + 1;
89 var uintL N = (uintL)(0.69314718*intDsize/2*actuallen/log(cl_double_approx(m))) + 1;
91 var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
92 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
94 for (n = 0; n < N; n++) {
95 new (&bv[n]) cl_I ((n % 2) == 0 ? (cl_I)(2*n+1) : -(cl_I)(2*n+1));
96 new (&qv[n]) cl_I (n==0 ? m : m2);
98 var cl_rational_series series;
99 series.av = NULL; series.bv = bv;
100 series.pv = NULL; series.qv = qv; series.qsv = NULL;
101 var cl_LF result = eval_rational_series(N,series,actuallen);
102 for (n = 0; n < N; n++) {
106 return shorten(result,len);
110 // Method 2: atan(1/m) = sum(n=0..infty, 4^n*n!^2/(2n+1)! * m/(m^2+1)^(n+1))
112 const cl_LF atan_recip_2a (cl_I m, uintC len)
114 var uintC actuallen = len + 1;
115 var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
117 var cl_LF fterm = cl_I_to_LF(m,actuallen)/m2;
118 var cl_LF fsum = fterm;
119 for (var uintL n = 1; fterm >= eps; n++) {
120 fterm = The(cl_LF)((2*n)*fterm)/((2*n+1)*m2);
121 fterm = cl_LF_shortenwith(fterm,eps);
122 fsum = fsum + LF_to_LF(fterm,actuallen);
124 return shorten(fsum,len);
127 const cl_LF atan_recip_2b (cl_I m, uintC len)
129 var uintC actuallen = len + 1;
131 var cl_I fterm = floor1((cl_I)m << (intDsize*actuallen), m2);
132 var cl_I fsum = fterm;
133 for (var uintL n = 1; fterm > 0; n++) {
134 fterm = floor1((2*n)*fterm,(2*n+1)*m2);
137 return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
140 const cl_LF atan_recip_2c (cl_I m, uintC len)
142 var uintC actuallen = len + 1;
144 var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(cl_double_approx(m2))) + 1;
145 var cl_I num = 0, den = 1; // "lazy rational number"
146 for (uintL n = N; n>0; n--) {
147 // Multiply sum with (2n)/(2n+1)(m^2+1):
149 den = den * ((2*n+1)*m2);
155 var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
156 return shorten(result,len);
159 const cl_LF atan_recip_2d (cl_I m, uintC len)
161 var uintC actuallen = len + 1;
163 var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(cl_double_approx(m2))) + 1;
165 var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
166 var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
168 new (&pv[0]) cl_I (m);
169 new (&qv[0]) cl_I (m2);
170 for (n = 1; n < N; n++) {
171 new (&pv[n]) cl_I (2*n);
172 new (&qv[n]) cl_I ((2*n+1)*m2);
174 var cl_rational_series series;
175 series.av = NULL; series.bv = NULL;
176 series.pv = pv; series.qv = qv; series.qsv = NULL;
177 var cl_LF result = eval_rational_series(N,series,actuallen);
178 for (n = 0; n < N; n++) {
182 return shorten(result,len);
186 // Main program: Compute and display the timings.
188 int main (int argc, char * argv[])
191 if ((argc >= 3) && !strcmp(argv[1],"-r")) {
192 repetitions = atoi(argv[2]);
193 argc -= 2; argv += 2;
197 cl_I m = (cl_I)argv[1];
198 uintL len = atoi(argv[2]);
200 ln(cl_I_to_LF(1000,len+10)); // fill cache
203 for (int rep = repetitions; rep > 0; rep--)
204 { p = atan_recip_1a(m,len); }
208 for (int rep = repetitions; rep > 0; rep--)
209 { p = atan_recip_1b(m,len); }
213 for (int rep = repetitions; rep > 0; rep--)
214 { p = atan_recip_1c(m,len); }
218 for (int rep = repetitions; rep > 0; rep--)
219 { p = atan_recip_1d(m,len); }
224 for (int rep = repetitions; rep > 0; rep--)
225 { p = atan_recip_2a(m,len); }
229 for (int rep = repetitions; rep > 0; rep--)
230 { p = atan_recip_2b(m,len); }
234 for (int rep = repetitions; rep > 0; rep--)
235 { p = atan_recip_2c(m,len); }
239 for (int rep = repetitions; rep > 0; rep--)
240 { p = atan_recip_2d(m,len); }
245 for (int rep = repetitions; rep > 0; rep--)
246 { p = The(cl_LF)(atan(cl_RA_to_LF(1/(cl_RA)m,len))); }
252 // Timings of the above algorithms, on an i486 33 MHz, running Linux.
253 // m = 390112. (For Jörg Arndt's formula (4.15).)
254 // N 1a 1b 1c 1d 2a 2b 2c 2d 3
255 // 10 0.0027 0.0018 0.0019 0.0019 0.0032 0.0022 0.0019 0.0019 0.0042
256 // 25 0.0085 0.0061 0.0058 0.0061 0.0095 0.0069 0.0056 0.0061 0.028
257 // 50 0.024 0.018 0.017 0.017 0.026 0.020 0.016 0.017 0.149
258 // 100 0.075 0.061 0.057 0.054 0.079 0.065 0.052 0.052 0.71
259 // 250 0.41 0.33 0.32 0.26 0.42 0.36 0.28 0.24 3.66
260 // 500 1.57 1.31 1.22 0.88 1.57 1.36 1.10 0.83 13.7
261 // 1000 6.08 5.14 4.56 2.76 6.12 5.36 4.06 2.58 45.5
262 // 2500 36.5 32.2 25.8 10.2 38.4 33.6 22.2 9.1 191
265 // asymp. N^2 N^2 N^2 FAST N^2 N^2 N^2 FAST FAST
267 // m = 319. (For Jörg Arndt's formula (4.7).)
268 // N 1a 1b 1c 1d 2a 2b 2c 2d 3
269 // 1000 6.06 4.40 9.17 3.82 5.29 3.90 7.50 3.53 50.3
271 // m = 18. (For Jörg Arndt's formula (4.4).)
272 // N 1a 1b 1c 1d 2a 2b 2c 2d 3
273 // 1000 11.8 9.0 22.3 6.0 10.2 7.7 17.1 5.7 54.3