1 // Fast integer multiplication using FFT in a modular ring.
2 // Bruno Haible 5.5.1996, 30.6.1996
4 // FFT in the complex domain has the drawback that it needs careful round-off
5 // error analysis. So here we choose another field of characteristic 0: Q_p.
6 // Since Q_p contains exactly the (p-1)th roots of unity, we choose
7 // p == 1 mod N and have the Nth roots of unity (N = 2^n) in Q_p and
8 // even in Z_p. Actually, we compute in Z/(p^m Z).
10 // All operations the FFT algorithm needs is addition, subtraction,
11 // multiplication, multiplication by the Nth root of unity and division
12 // by N. Hence we can use the domain Z/(p^m Z) even if p is not a prime!
14 // We want to compute the convolution of N 32-bit words. The resulting
15 // words are < (2^32)^2 * N. To avoid computing with numbers greater than
16 // 32 bits, we compute in Z/pZ for three different primes p in parallel,
17 // i.e. we compute in the ring (Z / p1 Z) x (Z / p2 Z) x (Z / p3 Z). We choose
18 // p1 = 3*2^30+1, p2 = 13*2^28+1, p3 = 29*2^27+1 (or 15*2^27+1 or 17*2^27+1).
19 // Because of p1*p2*p3 >= (2^32)^2 * N, the chinese remainder theorem will
20 // faithfully combine 3 32-bit words to a word < (2^32)^2 * N.
23 #error "fft mod p implemented only for intDsize==32"
26 static const uint32 p1 = 1+(3<<30); // = 3221225473
27 static const uint32 p2 = 1+(13<<28); // = 3489660929
28 static const uint32 p3 = 1+(29<<27); // = 3892314113
31 uint32 w1; // remainder mod p1
32 uint32 w2; // remainder mod p2
33 uint32 w3; // remainder mod p3
36 static const fftp3_word fftp3_roots_of_1 [27+1] =
37 // roots_of_1[n] is a (2^n)th root of unity in our ring.
38 // (Also roots_of_1[n-1] = roots_of_1[n]^2, but we don't need this.)
41 { 3221225472, 3489660928, 3892314112 },
42 { 1013946479, 1647819299, 800380159 },
43 { 1031213943, 1728043888, 1502037594 },
44 { 694614138, 156262243, 1093602721 },
45 { 347220834, 408915340, 491290336 },
46 { 680684264, 452506952, 846570852 },
47 { 1109768284, 1230864251, 390870396 },
48 { 602134989, 11914870, 1791906422 },
49 { 1080308101, 336213294, 158126993 },
50 { 381653707, 1548648704, 1432108380 },
51 { 902453688, 429650884, 798472051 },
52 { 1559299664, 775532293, 1877725713 },
53 { 254499731, 727160889, 1192318337 },
54 { 1376063215, 1557302953, 1642774092 },
55 { 1284040478, 937059094, 1876917422 },
56 { 336664489, 1411926644, 682311165 },
57 { 894491787, 534027329, 473693773 },
58 { 795860341, 1178663675, 1313928891 },
59 { 23880336, 1707047452, 93147496 },
60 { 790585193, 892284267, 1647947905 },
61 { 877386874, 1729337527, 1233672227 },
62 { 1510644826, 333000282, 296593948 },
63 { 353060343, 901807544, 659274384 },
64 { 716717815, 1281544649, 1457308949 },
65 { 1020271667, 1713714919, 627726344 },
66 { 139914905, 950720020, 1119863241 },
67 { 709308748, 675675166, 538726428 }
70 // Define this for (cheap) consistency checks.
73 // Define this for extensive consistency checks.
74 //#define DEBUG_FFTP3_OPERATIONS
76 // Define the algorithm of the backward FFT:
77 // Either FORWARD (a normal FFT followed by a permutation)
78 // or RECIPROOT (an FFT with reciprocal root of unity)
79 // or CLEVER (an FFT with reciprocal root of unity but clever computation
80 // of the reciprocals).
81 // Drawback of FORWARD: the permutation pass.
82 // Drawback of RECIPROOT: need all the powers of the root, not only half of them.
86 #define FFTP3_BACKWARD CLEVER
88 #ifdef DEBUG_FFTP3_OPERATIONS
89 #define check_fftp3_word(x) if ((x.w1 >= p1) || (x.w2 >= p2) || (x.w3 >= p3)) throw runtime_exception()
91 #define check_fftp3_word(x)
95 static inline void zerop3 (fftp3_word& r)
103 static inline void setp3 (uint32 x, fftp3_word& r)
105 if (p1 >= ((uint32)1 << 31))
106 r.w1 = (x >= p1 ? x - p1 : x);
108 divu_3232_3232(x,p1, ,r.w1=);
109 if (p2 >= ((uint32)1 << 31))
110 r.w2 = (x >= p2 ? x - p2 : x);
112 divu_3232_3232(x,p2, ,r.w2=);
113 if (p3 >= ((uint32)1 << 31))
114 r.w3 = (x >= p3 ? x - p3 : x);
116 divu_3232_3232(x,p3, ,r.w3=);
119 // Chinese remainder theorem:
120 // (Z / p1 Z) x (Z / p2 Z) x (Z / p3 Z) == Z / p1*p2*p3 Z = Z / P Z.
121 // Return r as an integer >= 0, < p1*p2*p3, as 3-digit-sequence res.
122 static void combinep3 (const fftp3_word& r, uintD* resLSDptr)
125 // Compute e1 * r.w1 + e2 * r.w2 + e3 * r.w3 where the idempotents are
126 // found as: xgcd(pi,p/pi) = 1 = ui*pi + vi*P/pi, ei = 1 - ui*pi.
127 // e1 = 35002755423056150739595925972
128 // e2 = 14584479687667766215746868453
129 // e3 = 37919651490985126265126719818
130 // Since e1+e2+e3 > 2*P, we prefer to compute with the negated
131 // idempotents, their sum is < P:
132 // -e1 = 8750687877798370870638831149
133 // -e2 = 29168963613186755394487888668
134 // -e3 = 5833791809869395345108037303
135 // We will have 0 <= -e1 * r.w1 + -e2 * r.w2 + -e3 * r.w3 <
136 // < -e1 * p1 + -e2 * p2 + -e3 * p3 < 2^32 * p1*p2*p3 < 2^128.
137 // The sum of the products fits in 4 digits, we divide by p1*p2*p3
138 // as a 3-digit sequence and finally negate the remainder.
139 #if CL_DS_BIG_ENDIAN_P
140 var const uintD p123 [3] = { 0x8D600002, 0x06800002, 0x78000001 };
141 var const uintD e1 [3] = { 0x1C46663C, 0x647FFF9D, 0x7E66662D };
142 var const uintD e2 [3] = { 0x5E40004D, 0xEDAAAB66, 0xEAAAAB1C };
143 var const uintD e3 [3] = { 0x12D99977, 0xB45554FE, 0x0EEEEEB7 };
145 var const uintD p123 [3] = { 0x78000001, 0x06800002, 0x8D600002 };
146 var const uintD e1 [3] = { 0x7E66662D, 0x647FFF9D, 0x1C46663C };
147 var const uintD e2 [3] = { 0xEAAAAB1C, 0xEDAAAB66, 0x5E40004D };
148 var const uintD e3 [3] = { 0x0EEEEEB7, 0xB45554FE, 0x12D99977 };
151 var uintD* const sumLSDptr = arrayLSDptr(sum,4);
152 mulu_loop_lsp(r.w1,arrayLSDptr(e1,3), sumLSDptr,3);
153 lspref(sumLSDptr,3) += muluadd_loop_lsp(r.w2,arrayLSDptr(e2,3), sumLSDptr,3);
154 lspref(sumLSDptr,3) += muluadd_loop_lsp(r.w3,arrayLSDptr(e3,3), sumLSDptr,3);
159 UDS_divide(arrayMSDptr(sum,4),4,arrayLSDptr(sum,4),
160 arrayMSDptr(p123,3),3,arrayLSDptr(p123,3),
165 copy_loop_lsp(r.LSDptr,arrayLSDptr(sum,4),r.len);
166 DS_clear_loop(arrayMSDptr(sum,4) mspop 1,3-r.len,arrayLSDptr(sum,4) lspop r.len);
169 // Division wie UDS_divide mit a_len=4, b_len=3.
174 divuD(highlowDD(lspref(sumLSDptr,3),lspref(sumLSDptr,2)),lspref(arrayLSDptr(p123,3),2), q_stern=,c1=);
175 { var uintDD c2 = highlowDD(c1,lspref(sumLSDptr,1));
176 var uintDD c3 = muluD(lspref(arrayLSDptr(p123,3),1),q_stern);
178 { q_stern = q_stern-1;
179 if (c3-c2 > highlowDD(lspref(arrayLSDptr(p123,3),2),lspref(arrayLSDptr(p123,3),1)))
180 { q_stern = q_stern-1; }
183 divuD(lspref(sumLSDptr,3),lspref(sumLSDptr,2),lspref(arrayLSDptr(p123,3),2), q_stern=,c1=);
184 { var uintD c2lo = lspref(sumLSDptr,1);
187 muluD(lspref(arrayLSDptr(p123,3),1),q_stern, c3hi=,c3lo=);
188 if ((c3hi > c1) || ((c3hi == c1) && (c3lo > c2lo)))
189 { q_stern = q_stern-1;
190 c3hi -= c1; if (c3lo < c2lo) { c3hi--; }; c3lo -= c2lo;
191 if ((c3hi > lspref(arrayLSDptr(p123,3),2)) || ((c3hi == lspref(arrayLSDptr(p123,3),2)) && (c3lo > lspref(arrayLSDptr(p123,3),1))))
192 { q_stern = q_stern-1; }
196 { var uintD carry = mulusub_loop_lsp(q_stern,arrayLSDptr(p123,3),sumLSDptr,3);
197 if (carry > lspref(sumLSDptr,3))
198 { q_stern = q_stern-1;
199 addto_loop_lsp(arrayLSDptr(p123,3),sumLSDptr,3);
203 if (lspref(sumLSDptr,0)==0 && lspref(sumLSDptr,1)==0 && lspref(sumLSDptr,2)==0) {
204 clear_loop_lsp(resLSDptr,3);
206 sub_loop_lsp(arrayLSDptr(p123,3),sumLSDptr,resLSDptr,3);
210 // r := (a + b) mod p
211 static inline void addp3 (const fftp3_word& a, const fftp3_word& b, fftp3_word& r)
215 check_fftp3_word(a); check_fftp3_word(b);
216 // Add single 32-bit words mod pi.
217 if (((x = (a.w1 + b.w1)) < b.w1) || (x >= p1))
220 if (((x = (a.w2 + b.w2)) < b.w2) || (x >= p2))
223 if (((x = (a.w3 + b.w3)) < b.w3) || (x >= p3))
229 // r := (a - b) mod p
230 static inline void subp3 (const fftp3_word& a, const fftp3_word& b, fftp3_word& r)
232 check_fftp3_word(a); check_fftp3_word(b);
233 // Subtract single 32-bit words mod pi.
234 r.w1 = (a.w1 < b.w1 ? a.w1-b.w1+p1 : a.w1-b.w1);
235 r.w2 = (a.w2 < b.w2 ? a.w2-b.w2+p2 : a.w2-b.w2);
236 r.w3 = (a.w3 < b.w3 ? a.w3-b.w3+p3 : a.w3-b.w3);
240 // r := (a * b) mod p
241 static void mulp3 (const fftp3_word& a, const fftp3_word& b, fftp3_word& res)
243 check_fftp3_word(a); check_fftp3_word(b);
244 // To divide c (0 <= c < p^2) by p = m*2^n+1,
245 // we set q := floor(floor(c/2^n)/m) and
246 // r := c - q*p = (floor(c/2^n) mod m)*2^n + (c mod 2^n) - q.
247 // If this becomes negative, set r := r + p (at most twice).
248 // (This works because floor(c/p) <= q <= floor(c/p)+2.)
249 // (Actually, here, 0 <= c <= (p-1)^2, hence
250 // floor(c/p) <= q <= floor(c/p)+1, so we have
251 // to set r := r + p at most once!)
253 #define mul_mod_p(aw,bw,result_zuweisung,p,n,m) \
257 mulu32(aw,bw, hi=,lo=); \
258 divu_6432_3232(hi,lo,p, ,result_zuweisung); \
261 #define mul_mod_p(aw,bw,result_zuweisung,p,n,m) \
265 mulu32(aw,bw, hi=,lo=); \
268 divu_6432_3232(hi>>n,(hi<<(32-n))|(lo>>n), m, q=,r=); \
269 r = (r << n) | (lo & (((uint32)1<<n)-1)); \
270 if (r >= q) { r = r-q; } else { r = r-q+p; } \
271 result_zuweisung r; \
274 // p1 = 3*2^30+1, n = 30, m = 3
275 mul_mod_p(a.w1,b.w1,res.w1=,p1,30,3);
276 // p2 = 13*2^28+1, n = 28, m = 13
277 mul_mod_p(a.w2,b.w2,res.w2=,p2,28,13);
278 // p3 = 29*2^27+1, n = 27, m = 29
279 mul_mod_p(a.w3,b.w3,res.w3=,p3,27,29);
281 check_fftp3_word(res);
283 #ifdef DEBUG_FFTP3_OPERATIONS
284 static void mulp3_doublecheck (const fftp3_word& a, const fftp3_word& b, fftp3_word& r)
286 fftp3_word zero, ma, mb, or;
292 if (!((r.w1 == or.w1) && (r.w2 == or.w2) && (r.w3 == or.w3)))
293 throw runtime_exception();
295 #define mulp3 mulp3_doublecheck
296 #endif /* DEBUG_FFTP3_OPERATIONS */
298 // b := (a / 2) mod p
299 static inline void shiftp3 (const fftp3_word& a, fftp3_word& b)
302 b.w1 = (a.w1 & 1 ? (a.w1 >> 1) + (p1 >> 1) + 1 : (a.w1 >> 1));
303 b.w2 = (a.w2 & 1 ? (a.w2 >> 1) + (p2 >> 1) + 1 : (a.w2 >> 1));
304 b.w3 = (a.w3 & 1 ? (a.w3 >> 1) + (p3 >> 1) + 1 : (a.w3 >> 1));
310 // Reverse an n-bit number x. n>0.
311 static uintC bit_reverse (uintL n, uintC x)
318 } while (!(--n == 0));
323 // Compute an convolution mod p using FFT: z[0..N-1] := x[0..N-1] * y[0..N-1].
324 static void fftp3_convolution (const uintL n, const uintC N, // N = 2^n
325 fftp3_word * x, // N words
326 fftp3_word * y, // N words
327 fftp3_word * z // N words result
331 #if (FFTP3_BACKWARD == RECIPROOT) || defined(DEBUG_FFTP3)
332 var fftp3_word* const w = cl_alloc_array(fftp3_word,N);
334 var fftp3_word* const w = cl_alloc_array(fftp3_word,(N>>1)+1);
337 // Initialize w[i] to w^i, w a primitive N-th root of unity.
338 w[0] = fftp3_roots_of_1[0];
339 w[1] = fftp3_roots_of_1[n];
340 #if (FFTP3_BACKWARD == RECIPROOT) || defined(DEBUG_FFTP3)
341 for (i = 2; i < N; i++)
342 mulp3(w[i-1],fftp3_roots_of_1[n], w[i]);
343 #else // need only half of the roots
344 for (i = 2; i < N>>1; i++)
345 mulp3(w[i-1],fftp3_roots_of_1[n], w[i]);
348 // Check that w is really a primitive N-th root of unity.
351 mulp3(w[N-1],fftp3_roots_of_1[n], w_N);
352 if (!(w_N.w1 == 1 && w_N.w2 == 1 && w_N.w3 == 1))
353 throw runtime_exception();
355 if (!(w_N.w1 == p1-1 && w_N.w2 == p2-1 && w_N.w3 == p3-1))
356 throw runtime_exception();
359 var bool squaring = (x == y);
360 // Do an FFT of length N on x.
364 var const uintC tmax = N>>1; // tmax = 2^(n-1)
365 for (var uintC t = 0; t < tmax; t++) {
367 var uintC i2 = i1 + tmax;
368 // Butterfly: replace (x(i1),x(i2)) by
369 // (x(i1) + x(i2), x(i1) - x(i2)).
372 subp3(x[i1],tmp, x[i2]);
373 addp3(x[i1],tmp, x[i1]);
376 for (l = n-2; l>=0; l--) {
377 var const uintC smax = (uintC)1 << (n-1-l);
378 var const uintC tmax = (uintC)1 << l;
379 for (var uintC s = 0; s < smax; s++) {
380 var uintC exp = bit_reverse(n-1-l,s) << l;
381 for (var uintC t = 0; t < tmax; t++) {
382 var uintC i1 = (s << (l+1)) + t;
383 var uintC i2 = i1 + tmax;
384 // Butterfly: replace (x(i1),x(i2)) by
385 // (x(i1) + w^exp*x(i2), x(i1) - w^exp*x(i2)).
387 mulp3(x[i2],w[exp], tmp);
388 subp3(x[i1],tmp, x[i2]);
389 addp3(x[i1],tmp, x[i1]);
394 // Do an FFT of length N on y.
398 var uintC const tmax = N>>1; // tmax = 2^(n-1)
399 for (var uintC t = 0; t < tmax; t++) {
401 var uintC i2 = i1 + tmax;
402 // Butterfly: replace (y(i1),y(i2)) by
403 // (y(i1) + y(i2), y(i1) - y(i2)).
406 subp3(y[i1],tmp, y[i2]);
407 addp3(y[i1],tmp, y[i1]);
410 for (l = n-2; l>=0; l--) {
411 var const uintC smax = (uintC)1 << (n-1-l);
412 var const uintC tmax = (uintC)1 << l;
413 for (var uintC s = 0; s < smax; s++) {
414 var uintC exp = bit_reverse(n-1-l,s) << l;
415 for (var uintC t = 0; t < tmax; t++) {
416 var uintC i1 = (s << (l+1)) + t;
417 var uintC i2 = i1 + tmax;
418 // Butterfly: replace (y(i1),y(i2)) by
419 // (y(i1) + w^exp*y(i2), y(i1) - w^exp*y(i2)).
421 mulp3(y[i2],w[exp], tmp);
422 subp3(y[i1],tmp, y[i2]);
423 addp3(y[i1],tmp, y[i1]);
428 // Multiply the transformed vectors into z.
429 for (i = 0; i < N; i++)
430 mulp3(x[i],y[i], z[i]);
431 // Undo an FFT of length N on z.
434 for (l = 0; l < n-1; l++) {
435 var const uintC smax = (uintC)1 << (n-1-l);
436 var const uintC tmax = (uintC)1 << l;
437 #if FFTP3_BACKWARD != CLEVER
438 for (var uintC s = 0; s < smax; s++) {
439 var uintC exp = bit_reverse(n-1-l,s) << l;
440 #if FFTP3_BACKWARD == RECIPROOT
442 exp = N - exp; // negate exp (use w^-1 instead of w)
444 for (var uintC t = 0; t < tmax; t++) {
445 var uintC i1 = (s << (l+1)) + t;
446 var uintC i2 = i1 + tmax;
447 // Inverse Butterfly: replace (z(i1),z(i2)) by
448 // ((z(i1)+z(i2))/2, (z(i1)-z(i2))/(2*w^exp)).
451 addp3(z[i1],z[i2], sum);
452 subp3(z[i1],z[i2], diff);
454 mulp3(diff,w[exp], diff); shiftp3(diff, z[i2]);
457 #else // FFTP3_BACKWARD == CLEVER: clever handling of negative exponents
458 /* s = 0, exp = 0 */ {
459 for (var uintC t = 0; t < tmax; t++) {
461 var uintC i2 = i1 + tmax;
462 // Inverse Butterfly: replace (z(i1),z(i2)) by
463 // ((z(i1)+z(i2))/2, (z(i1)-z(i2))/(2*w^exp)),
467 addp3(z[i1],z[i2], sum);
468 subp3(z[i1],z[i2], diff);
470 shiftp3(diff, z[i2]);
473 for (var uintC s = 1; s < smax; s++) {
474 var uintC exp = bit_reverse(n-1-l,s) << l;
475 exp = (N>>1) - exp; // negate exp (use w^-1 instead of w)
476 for (var uintC t = 0; t < tmax; t++) {
477 var uintC i1 = (s << (l+1)) + t;
478 var uintC i2 = i1 + tmax;
479 // Inverse Butterfly: replace (z(i1),z(i2)) by
480 // ((z(i1)+z(i2))/2, (z(i1)-z(i2))/(2*w^exp)),
481 // with exp <-- (N/2 - exp).
484 addp3(z[i1],z[i2], sum);
485 subp3(z[i2],z[i1], diff); // note that w^(N/2) = -1
487 mulp3(diff,w[exp], diff); shiftp3(diff, z[i2]);
493 var const uintC tmax = N>>1; // tmax = 2^(n-1)
494 for (var uintC t = 0; t < tmax; t++) {
496 var uintC i2 = i1 + tmax;
497 // Inverse Butterfly: replace (z(i1),z(i2)) by
498 // ((z(i1)+z(i2))/2, (z(i1)-z(i2))/2).
501 addp3(z[i1],z[i2], sum);
502 subp3(z[i1],z[i2], diff);
504 shiftp3(diff, z[i2]);
508 #if FFTP3_BACKWARD == FORWARD
509 // Swap z[i] and z[N-i] for 0 < i < N/2.
510 for (i = (N>>1)-1; i > 0; i--) {
511 var fftp3_word tmp = z[i];
518 static void mulu_fft_modp3 (const uintD* sourceptr1, uintC len1,
519 const uintD* sourceptr2, uintC len2,
521 // Es ist 2 <= len1 <= len2.
524 // source1 ist ein Stück der Länge N1, source2 ein oder mehrere Stücke
525 // der Länge N2, mit N1+N2 <= N, wobei N Zweierpotenz ist.
526 // sum(i=0..N-1, x_i b^i) * sum(i=0..N-1, y_i b^i) wird errechnet,
527 // indem man die beiden Polynome
528 // sum(i=0..N-1, x_i T^i), sum(i=0..N-1, y_i T^i)
529 // multipliziert, und zwar durch Fourier-Transformation (s.o.).
531 integerlengthC(len1-1, n=); // 2^(n-1) < len1 <= 2^n
532 var uintC len = (uintC)1 << n; // kleinste Zweierpotenz >= len1
533 // Wählt man N = len, so hat man ceiling(len2/(len-len1+1)) * FFT(len).
534 // Wählt man N = 2*len, so hat man ceiling(len2/(2*len-len1+1)) * FFT(2*len).
535 // Wir wählen das billigere von beiden:
536 // Bei ceiling(len2/(len-len1+1)) <= 2 * ceiling(len2/(2*len-len1+1))
537 // nimmt man N = len, bei ....... > ........ dagegen N = 2*len.
538 // (Wahl von N = 4*len oder mehr bringt nur in Extremfällen etwas.)
539 if (len2 > 2 * (len-len1+1) * (len2 <= (2*len-len1+1) ? 1 : ceiling(len2,(2*len-len1+1)))) {
543 var const uintC N = len; // N = 2^n
545 var fftp3_word* const x = cl_alloc_array(fftp3_word,N);
546 var fftp3_word* const y = cl_alloc_array(fftp3_word,N);
548 var fftp3_word* const z = cl_alloc_array(fftp3_word,N);
550 var fftp3_word* const z = x; // put z in place of x - saves memory
552 var uintD* const tmpprod = cl_alloc_array(uintD,len1+1);
554 var uintC destlen = len1+len2;
555 clear_loop_lsp(destptr,destlen);
557 var uintC len2p; // length of a piece of source2
558 len2p = N - len1 + 1;
561 // len2p = min(N-len1+1,len2).
564 var uintD* tmpptr = arrayLSDptr(tmpprod,len1+1);
565 mulu_loop_lsp(lspref(sourceptr2,0),sourceptr1,tmpptr,len1);
566 if (addto_loop_lsp(tmpptr,destptr,len1+1))
567 if (inc_loop_lsp(destptr lspop (len1+1),destlen-(len1+1)))
568 throw runtime_exception();
570 var uintC destlenp = len1 + len2p - 1;
571 // destlenp = min(N,destlen-1).
572 var bool squaring = ((sourceptr1 == sourceptr2) && (len1 == len2p));
575 for (i = 0; i < len1; i++)
576 setp3(lspref(sourceptr1,i), x[i]);
577 for (i = len1; i < N; i++)
582 for (i = 0; i < len2p; i++)
583 setp3(lspref(sourceptr2,i), y[i]);
584 for (i = len2p; i < N; i++)
589 fftp3_convolution(n,N, &x[0], &y[0], &z[0]);
591 fftp3_convolution(n,N, &x[0], &x[0], &z[0]);
592 // Add result to destptr[-destlen..-1]:
594 var uintD* ptr = destptr;
595 // ac2|ac1|ac0 are an accumulator.
600 for (i = 0; i < destlenp; i++) {
601 // Convert z[i] to a 3-digit number.
603 combinep3(z[i],arrayLSDptr(z_i,3));
605 if (!(arrayLSref(z_i,3,2) < N))
606 throw runtime_exception();
608 // Add z[i] to the accumulator.
609 tmp = arrayLSref(z_i,3,0);
610 if ((ac0 += tmp) < tmp) {
614 tmp = arrayLSref(z_i,3,1);
615 if ((ac1 += tmp) < tmp)
617 tmp = arrayLSref(z_i,3,2);
619 // Add the accumulator's least significant word to destptr:
621 if ((ac0 += tmp) < tmp) {
633 if (!((i += 2) <= destlen))
634 throw runtime_exception();
636 if ((ac0 += tmp) < tmp)
645 if (inc_loop_lsp(ptr,destlen-i))
646 throw runtime_exception();
647 } else if (ac0 > 0) {
648 if (!((i += 1) <= destlen))
649 throw runtime_exception();
655 if (inc_loop_lsp(ptr,destlen-i))
656 throw runtime_exception();
660 // If destlenp < N, check that the remaining z[i] are 0.
661 for (i = destlenp; i < N; i++)
662 if (z[i].w1 > 0 || z[i].w2 > 0 || z[i].w3 > 0)
663 throw runtime_exception();
667 destptr = destptr lspop len2p;
669 sourceptr2 = sourceptr2 lspop len2p;