X-Git-Url: https://ginac.de/CLN/cln.git//cln.git?a=blobdiff_plain;f=src%2Fbase%2Fdigitseq%2Fcl_DS_mul_fftr.h;h=d99da53837a7f1d4a5828ee2434898048b71b0d1;hb=22549ef70fee95faab1e9f2adaf710ba9e0bdabf;hp=cc1f865ba9bbe47e37177a710e78c1612387aa3d;hpb=dd9e0f894eec7e2a8cf85078330ddc0a6639090b;p=cln.git diff --git a/src/base/digitseq/cl_DS_mul_fftr.h b/src/base/digitseq/cl_DS_mul_fftr.h index cc1f865..d99da53 100644 --- a/src/base/digitseq/cl_DS_mul_fftr.h +++ b/src/base/digitseq/cl_DS_mul_fftr.h @@ -196,9 +196,9 @@ #endif -#include "cl_floatparam.h" -#include "cl_io.h" -#include "cl_abort.h" +#include "cln/floatparam.h" +#include "cln/exception.h" + #if defined(HAVE_LONGDOUBLE) && (long_double_mant_bits > double_mant_bits) && (defined(__i386__) || defined(__m68k__) || (defined(__sparc__) && 0)) // Only these CPUs have fast "long double"s in hardware. @@ -397,10 +397,10 @@ static inline void mul (const fftr_complex& a, const fftr_complex& b, fftr_compl r.im = r_im; } -static uintL shuffle (uintL n, uintL x) +static uintC shuffle (uintL n, uintC x) { - var uintL y = 0; - var sintL v = 1; + var uintC y = 0; + var sintC v = 1; // Invariant: y + v*shuffle(n,x). do { if (x & 1) @@ -412,30 +412,30 @@ static uintL shuffle (uintL n, uintL x) } x >>= 1; } while (!(--n == 0)); - cl_abort(); + throw runtime_exception(); } #if 0 // unused -static uintL invshuffle (uintL n, uintL x) +static uintC invshuffle (uintL n, uintC x) { - var uintL y = 0; - var uintL v = 1; + var uintC y = 0; + var uintC v = 1; // Invariant: y + v*invshuffle(n,x). do { - if (x == ((uintL)1 << (n-1))) + if (x == ((uintC)1 << (n-1))) return y + v; - else if (x > ((uintL)1 << (n-1))) { - x = ((uintL)1 << n) - x; + else if (x > ((uintC)1 << (n-1))) { + x = ((uintC)1 << n) - x; y = y+v; } v <<= 1; } while (!(--n == 0)); - cl_abort(); + throw runtime_exception(); } #endif // Compute a real convolution using FFT: z[0..N-1] := x[0..N-1] * y[0..N-1]. -static void fftr_convolution (const uintL n, const uintL N, // N = 2^n +static void fftr_convolution (const uintL n, const uintC N, // N = 2^n fftr_real * x, // N numbers fftr_real * y, // N numbers fftr_real * z // N numbers result @@ -443,7 +443,7 @@ static void fftr_convolution (const uintL n, const uintL N, // N = 2^n { CL_ALLOCA_STACK; var fftr_cosinus* const w = cl_alloc_array(fftr_cosinus,N>>2); - var uintL i; + var uintC i; // Initialize w[exp].c to exp(2 pi i exp/N). w[0].c = fftr_roots_of_1[0]; { @@ -468,10 +468,10 @@ static void fftr_convolution (const uintL n, const uintL N, // N = 2^n var uintL l; for (l = n-1; l > 0; l--) { /* s = 0 */ { - var const uintL tmax = (uintL)1 << l; - for (var uintL t = 0; t < tmax; t++) { - var uintL i1 = t; - var uintL i2 = i1 + tmax; + var const uintC tmax = (uintC)1 << l; + for (var uintC t = 0; t < tmax; t++) { + var uintC i1 = t; + var uintC i2 = i1 + tmax; // replace (x[i1],x[i2]) by // (x[i1] + x[i2], x[i1] - x[i2]) var fftr_real tmp; @@ -480,15 +480,15 @@ static void fftr_convolution (const uintL n, const uintL N, // N = 2^n x[i1] = x[i1] + tmp; } } - var const uintL smax = (uintL)1 << (n-1-l); - var const uintL tmax = (uintL)1 << (l-1); - for (var uintL s = 1; s < smax; s++) { - var uintL exp = shuffle(n-1-l,s) << (l-1); - for (var uintL t = 0; t < tmax; t++) { - var uintL i1 = (s << (l+1)) + t; - var uintL i2 = i1 + tmax; - var uintL i3 = i2 + tmax; - var uintL i4 = i3 + tmax; + var const uintC smax = (uintC)1 << (n-1-l); + var const uintC tmax = (uintC)1 << (l-1); + for (var uintC s = 1; s < smax; s++) { + var uintC exp = shuffle(n-1-l,s) << (l-1); + for (var uintC t = 0; t < tmax; t++) { + var uintC i1 = (s << (l+1)) + t; + var uintC i2 = i1 + tmax; + var uintC i3 = i2 + tmax; + var uintC i4 = i3 + tmax; // replace (x[i1],x[i2],x[i3],x[i4]) by // (x[i1]-x[i3] - x[i4]*gam, // x[i3]*gam + x[i2]+x[i4]*(gam^2-1), @@ -522,10 +522,10 @@ static void fftr_convolution (const uintL n, const uintL N, // N = 2^n var uintL l; for (l = n-1; l > 0; l--) { /* s = 0 */ { - var const uintL tmax = (uintL)1 << l; - for (var uintL t = 0; t < tmax; t++) { - var uintL i1 = t; - var uintL i2 = i1 + tmax; + var const uintC tmax = (uintC)1 << l; + for (var uintC t = 0; t < tmax; t++) { + var uintC i1 = t; + var uintC i2 = i1 + tmax; // replace (y[i1],y[i2]) by // (y[i1] + y[i2], y[i1] - y[i2]) var fftr_real tmp; @@ -534,15 +534,15 @@ static void fftr_convolution (const uintL n, const uintL N, // N = 2^n y[i1] = y[i1] + tmp; } } - var const uintL smax = (uintL)1 << (n-1-l); - var const uintL tmax = (uintL)1 << (l-1); - for (var uintL s = 1; s < smax; s++) { - var uintL exp = shuffle(n-1-l,s) << (l-1); - for (var uintL t = 0; t < tmax; t++) { - var uintL i1 = (s << (l+1)) + t; - var uintL i2 = i1 + tmax; - var uintL i3 = i2 + tmax; - var uintL i4 = i3 + tmax; + var const uintC smax = (uintC)1 << (n-1-l); + var const uintC tmax = (uintC)1 << (l-1); + for (var uintC s = 1; s < smax; s++) { + var uintC exp = shuffle(n-1-l,s) << (l-1); + for (var uintC t = 0; t < tmax; t++) { + var uintC i1 = (s << (l+1)) + t; + var uintC i2 = i1 + tmax; + var uintC i3 = i2 + tmax; + var uintC i4 = i3 + tmax; // replace (y[i1],y[i2],y[i3],y[i4]) by // (y[i1]-y[i3] - y[i4]*gam, // y[i3]*gam + y[i2]+y[i4]*(gam^2-1), @@ -578,11 +578,11 @@ static void fftr_convolution (const uintL n, const uintL N, // N = 2^n // Multiplication mod (z+1). z[1] = x[1] * y[1]; #if 0 // This needs w[1..2^(n-1)-1]. - var const uintL smax = (uintL)1 << (n-1); - for (var uintL s = 1; s < smax; s++) { + var const uintC smax = (uintC)1 << (n-1); + for (var uintC s = 1; s < smax; s++) { // Multiplication mod (z^2 - 2 cos(2 pi j/2^n) z + 1) // with j = shuffle(n-1,s). - var uintL exp = shuffle(n-1,s); + var uintC exp = shuffle(n-1,s); var fftr_real tmp0 = x[2*s] * y[2*s]; var fftr_real tmp1 = x[2*s] * y[2*s+1] + x[2*s+1] * y[2*s]; var fftr_real tmp2 = x[2*s+1] * y[2*s+1]; @@ -598,9 +598,9 @@ static void fftr_convolution (const uintL n, const uintL N, // N = 2^n z[2] = tmp0 - tmp2; z[3] = tmp1; } - var const uintL smax = (uintL)1 << (n-2); - for (var uintL s = 1; s < smax; s++) { - var uintL exp = shuffle(n-2,s); + var const uintC smax = (uintC)1 << (n-2); + for (var uintC s = 1; s < smax; s++) { + var uintC exp = shuffle(n-2,s); // Multiplication mod (z^2 - 2 cos(2 pi j/2^n) z + 1) // with j = shuffle(n-1,2*s) = shuffle(n-2,s). var fftr_real gam = w[exp].gam.d; @@ -635,10 +635,10 @@ static void fftr_convolution (const uintL n, const uintL N, // N = 2^n } for (l = 1; l < n; l++) { /* s = 0 */ { - var const uintL tmax = (uintL)1 << l; - for (var uintL t = 0; t < tmax; t++) { - var uintL i1 = t; - var uintL i2 = i1 + tmax; + var const uintC tmax = (uintC)1 << l; + for (var uintC t = 0; t < tmax; t++) { + var uintC i1 = t; + var uintC i2 = i1 + tmax; // replace (z[i1],z[i2]) by // ((z[i1]+z[i2])/2, (z[i1]-z[i2])/2) // Do the division by 2 later. @@ -648,15 +648,15 @@ static void fftr_convolution (const uintL n, const uintL N, // N = 2^n z[i1] = z[i1] + tmp; } } - var const uintL smax = (uintL)1 << (n-1-l); - var const uintL tmax = (uintL)1 << (l-1); - for (var uintL s = 1; s < smax; s++) { - var uintL exp = shuffle(n-1-l,s) << (l-1); - for (var uintL t = 0; t < tmax; t++) { - var uintL i1 = (s << (l+1)) + t; - var uintL i2 = i1 + tmax; - var uintL i3 = i2 + tmax; - var uintL i4 = i3 + tmax; + var const uintC smax = (uintC)1 << (n-1-l); + var const uintC tmax = (uintC)1 << (l-1); + for (var uintC s = 1; s < smax; s++) { + var uintC exp = shuffle(n-1-l,s) << (l-1); + for (var uintC t = 0; t < tmax; t++) { + var uintC i1 = (s << (l+1)) + t; + var uintC i2 = i1 + tmax; + var uintC i3 = i2 + tmax; + var uintC i4 = i3 + tmax; // replace (z[i1],z[i2],z[i3],z[i4]) by // ((z[i1]+z[i3]+(z[i2]-z[i4])/gam)/2, // (z[i2]+z[i4]-(z[i3]-z[i1])/gam*(gam^2-1))/2, @@ -706,15 +706,14 @@ static int max_l_table[32+1] = // Split len uintD's below sourceptr into chunks of l bits, thus filling // N real numbers at x. -static void fill_factor (uintL N, fftr_real* x, uintL l, - const uintD* sourceptr, uintL len) +static void fill_factor (uintC N, fftr_real* x, uintL l, + const uintD* sourceptr, uintC len) { - var uintL i; + var uintC i; if (max_l(2) > intDsize && l > intDsize) { // l > intDsize if (max_l(2) > 64 && l > 64) { - fprint(cl_stderr, "FFT problem: l > 64 not supported by pow2_table\n"); - cl_abort(); + throw runtime_exception("FFT problem: l > 64 not supported by pow2_table"); } var fftr_real carry = 0; var sintL carrybits = 0; // number of bits in carry (>=0, N) - cl_abort(); + throw runtime_exception(); } else if (max_l(2) >= intDsize && l == intDsize) { // l = intDsize if (len > N) - cl_abort(); + throw runtime_exception(); for (i = 0; i < len; i++) { var uintD digit = lsprefnext(sourceptr); x[i] = (fftr_real)digit; @@ -768,14 +767,14 @@ static void fill_factor (uintL N, fftr_real* x, uintL l, } while (carrybits > 0) { if (!(i < N)) - cl_abort(); + throw runtime_exception(); x[i] = (fftr_real)(carry & l_mask); carry >>= l; carrybits -= l; i++; } if (len > 0) - cl_abort(); + throw runtime_exception(); } for ( ; i < N; i++) x[i] = (fftr_real)0; @@ -826,11 +825,11 @@ static inline fftr_real fftr_ffloor (fftr_real x) // The z[i] are known to be approximately integers >= 0, < N*2^(2*l). // Assumes room for floor(N*l/intDsize)+(1+ceiling((n+2*l)/intDsize)) uintD's // below destptr. Fills len digits and returns (destptr lspop len). -static uintD* unfill_product (uintL n, uintL N, // N = 2^n +static uintD* unfill_product (uintL n, uintC N, // N = 2^n const fftr_real * z, uintL l, uintD* destptr) { - var uintL i; + var uintC i; if (n + 2*l <= intDsize) { // 2-digit carry is sufficient, l < intDsize var uintD carry0 = 0; @@ -928,7 +927,7 @@ static uintD* unfill_product (uintL n, uintL N, // N = 2^n if (!(digit >= (fftr_real)0 && z[i] > digit - (fftr_real)0.5 && z[i] < digit + (fftr_real)0.5)) - cl_abort(); + throw runtime_exception(); #endif if (shift > 0) digit = digit * fftr_pow2_table[shift]; @@ -976,35 +975,34 @@ static inline void mulu_fftr_nocheck (const uintD* sourceptr1, uintC len1, // is 2*len1*intDsize/l_max - 1 <= 2^k. { var const int l = max_l(2); - var uintL lhs = 2*ceiling((uintL)len1*intDsize,l) - 1; // >=1 + var uintC lhs = 2*ceiling(len1*intDsize,l) - 1; // >=1 if (lhs < 3) k = 2; else - integerlength32(lhs-1, k=); // k>=2 + integerlengthC(lhs-1, k=); // k>=2 } // Try whether this k is ok or whether we have to increase k. for ( ; ; k++) { if (k >= sizeof(max_l_table)/sizeof(max_l_table[0]) || max_l_table[k] <= 0) { - fprint(cl_stderr, "FFT problem: numbers too big, floating point precision not sufficient\n"); - cl_abort(); + throw runtime_exception("FFT problem: numbers too big, floating point precision not sufficient"); } - if (2*ceiling((uintL)len1*intDsize,max_l_table[k])-1 <= ((uintL)1 << k)) + if (2*ceiling(len1*intDsize,max_l_table[k])-1 <= ((uintC)1 << k)) break; } // We could try to reduce l, keeping the same k. But why should we? // Calculate the number of pieces in which source2 will have to be // split. Each of the pieces must satisfy // ceiling(len1*intDsize/l) + ceiling(len2*intDsize/l) - 1 <= 2^k, - var uintL len2p; + var uintC len2p; // Try once with k, once with k+1. Compare them. { - var uintL remaining_k = ((uintL)1 << k) + 1 - ceiling((uintL)len1*intDsize,max_l_table[k]); - var uintL max_piecelen_k = floor(remaining_k*max_l_table[k],intDsize); - var uintL numpieces_k = ceiling(len2,max_piecelen_k); - var uintL remaining_k1 = ((uintL)1 << (k+1)) + 1 - ceiling((uintL)len1*intDsize,max_l_table[k+1]); - var uintL max_piecelen_k1 = floor(remaining_k1*max_l_table[k+1],intDsize); - var uintL numpieces_k1 = ceiling(len2,max_piecelen_k1); + var uintC remaining_k = ((uintC)1 << k) + 1 - ceiling(len1*intDsize,max_l_table[k]); + var uintC max_piecelen_k = floor(remaining_k*max_l_table[k],intDsize); + var uintC numpieces_k = ceiling(len2,max_piecelen_k); + var uintC remaining_k1 = ((uintC)1 << (k+1)) + 1 - ceiling(len1*intDsize,max_l_table[k+1]); + var uintC max_piecelen_k1 = floor(remaining_k1*max_l_table[k+1],intDsize); + var uintC numpieces_k1 = ceiling(len2,max_piecelen_k1); if (numpieces_k <= 2*numpieces_k1) { // keep k len2p = max_piecelen_k; @@ -1016,7 +1014,7 @@ static inline void mulu_fftr_nocheck (const uintD* sourceptr1, uintC len1, } var const uintL l = max_l_table[k]; var const uintL n = k; - var const uintL N = (uintL)1 << n; + var const uintC N = (uintC)1 << n; CL_ALLOCA_STACK; var fftr_real* const x = cl_alloc_array(fftr_real,N); var fftr_real* const y = cl_alloc_array(fftr_real,N); @@ -1026,9 +1024,9 @@ static inline void mulu_fftr_nocheck (const uintD* sourceptr1, uintC len1, var fftr_real* const z = x; // put z in place of x - saves memory #endif var uintD* const tmpprod1 = cl_alloc_array(uintD,len1+1); - var uintL tmpprod_len = floor(l< len2) @@ -1039,7 +1037,7 @@ static inline void mulu_fftr_nocheck (const uintD* sourceptr1, uintC len1, mulu_loop_lsp(lspref(sourceptr2,0),sourceptr1,tmpptr,len1); if (addto_loop_lsp(tmpptr,destptr,len1+1)) if (inc_loop_lsp(destptr lspop (len1+1),destlen-(len1+1))) - cl_abort(); + throw runtime_exception(); } else { var bool squaring = ((sourceptr1 == sourceptr2) && (len1 == len2p)); // Fill factor x. @@ -1057,31 +1055,31 @@ static inline void mulu_fftr_nocheck (const uintD* sourceptr1, uintC len1, { var fftr_real re_lo_limit = (fftr_real)(-0.5); var fftr_real re_hi_limit = (fftr_real)N * fftr_pow2_table[l] * fftr_pow2_table[l] + (fftr_real)0.5; - for (var uintL i = 0; i < N; i++) + for (var uintC i = 0; i < N; i++) if (!(z[i] > re_lo_limit && z[i] < re_hi_limit)) - cl_abort(); + throw runtime_exception(); } #endif var uintD* tmpLSDptr = arrayLSDptr(tmpprod,tmpprod_len); var uintD* tmpMSDptr = unfill_product(n,N,z,l,tmpLSDptr); - var uintL tmplen = + var uintC tmplen = #if CL_DS_BIG_ENDIAN_P tmpLSDptr - tmpMSDptr; #else tmpMSDptr - tmpLSDptr; #endif if (tmplen > tmpprod_len) - cl_abort(); + throw runtime_exception(); // Add result to destptr[-destlen..-1]: if (tmplen > destlen) { if (test_loop_msp(tmpMSDptr,tmplen-destlen)) - cl_abort(); + throw runtime_exception(); tmplen = destlen; } if (addto_loop_lsp(tmpLSDptr,destptr,tmplen)) if (inc_loop_lsp(destptr lspop tmplen,destlen-tmplen)) - cl_abort(); + throw runtime_exception(); } // Decrement len2. destptr = destptr lspop len2p; @@ -1136,7 +1134,6 @@ static void mulu_fftr (const uintD* sourceptr1, uintC len1, var uintD checksum = multiply_checksum(checksum1,checksum2); mulu_fftr_nocheck(sourceptr1,len1,sourceptr2,len2,destptr); if (!(checksum == compute_checksum(destptr,len1+len2))) { - fprint(cl_stderr, "FFT problem: checksum error\n"); - cl_abort(); + throw runtime_exception("FFT problem: checksum error."); } }