src/base/digitseq/cl_2DS_div.cc

   1 // div2adic().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 // Specification.
   7 #include "cl_2DS.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cl_2D.h"
  13 #include "cl_DS.h"
  14 #include "cl_abort.h"
  15
  16 // Timings on a i486 33 MHz running Linux:
  17 // Divide N digits by N digits          Divide 2*N digits by N digits
  18 //     N   standard  Newton                 N   standard  Newton
  19 //     10    0.00015 0.00054                10    0.00023 0.00054
  20 //     25    0.00065 0.00256                25    0.00116 0.00256
  21 //     50    0.0024  0.0083                 50    0.0044  0.0082
  22 //    100    0.0089  0.027                 100    0.0172  0.027
  23 //    250    0.054   0.130                 250    0.107   0.130
  24 //    500    0.22    0.42                  500    0.425   0.42
  25 //   1000    0.86    1.30                 1000    1.72    1.30
  26 //   2500    5.6     4.1                  2500   11.0     4.1
  27 //   5000   22.3     9.4                  5000   44.7     9.3
  28 //  10000   91.2    20.6                 10000  182      20.5
  29 //  -----> Newton faster for N >= 2070   -----> Newton faster for N >= 500
  30 //
  31 // 1.0*N / N : Newton for N >= 2070 or 1790 >= N >= 1460
  32 // 1.1*N / N : Newton for N >= 1880 or 1790 >= N >= 1320
  33 // 1.2*N / N : Newton for N >= 1250
  34 // 1.3*N / N : Newton for N >= 1010
  35 // 1.4*N / N : Newton for N >=  940
  36 // 1.5*N / N : Newton for N >=  750
  37 // 1.6*N / N : Newton for N >=  625
  38 // 1.7*N / N : Newton for N >=  550
  39 // 1.8*N / N : Newton for N >=  500
  40 // 1.9*N / N : Newton for N >=  500
  41 // 2.0*N / N : Newton for N >=  500
  42 //
  43 // Break-even-point. When in doubt, prefer to choose the standard algorithm.
  44   static inline cl_boolean cl_recip_suitable (uintL m, uintL n) // n <= m
  45     { if (n < 500)
  46         return cl_false;
  47       else // when n >= 2100/(m/n)^2, i.e. (m/46)^2 > n
  48         { var uintL mq = floor(m,46);
  49           if ((mq >= bit(16)) || ((uintL)(mq*mq) > n))
  50             return cl_true;
  51           else
  52             return cl_false;
  53         }
  54     }
  55
  56 void div2adic (uintC a_len, const uintD* a_LSDptr, uintC b_len, const uintD* b_LSDptr, uintD* dest_LSDptr)
  57 {
  58   var uintC lendiff = a_len - b_len;
  59   if (cl_recip_suitable(a_len,b_len))
  60     { // Division using reciprocal (Newton-Hensel algorithm).
  61       CL_ALLOCA_STACK;
  62       // Bestimme Kehrwert c von b mod 2^(intDsize*b_len).
  63       var uintD* c_LSDptr;
  64       num_stack_alloc(b_len,,c_LSDptr=);
  65       recip2adic(b_len,b_LSDptr,c_LSDptr);
  66       // Bestimme q := a * c mod 2^(intDsize*b_len).
  67       var uintD* q_LSDptr;
  68       num_stack_alloc(2*b_len,,q_LSDptr=);
  69       cl_UDS_mul(a_LSDptr,b_len,c_LSDptr,b_len,q_LSDptr);
  70       // Zur Bestimmung des Restes wieder mit b multiplizieren:
  71       var uintD* p_LSDptr;
  72       num_stack_alloc(2*b_len,,p_LSDptr=);
  73       cl_UDS_mul(q_LSDptr,b_len,b_LSDptr,b_len,p_LSDptr);
  74       // Überprüfen, daß p == a mod 2^(intDsize*b_len):
  75       if (compare_loop_msp(a_LSDptr lspop b_len,p_LSDptr lspop b_len,b_len))
  76         cl_abort();
  77       // Quotient q und "Rest" (a-b*q)/2^(intDsize*b_len) ablegen:
  78       copy_loop_lsp(q_LSDptr,dest_LSDptr,b_len);
  79       if (lendiff <= b_len)
  80         { sub_loop_lsp(a_LSDptr lspop b_len,p_LSDptr lspop b_len,dest_LSDptr lspop b_len,lendiff); }
  81         else
  82         { var uintD carry = sub_loop_lsp(a_LSDptr lspop b_len,p_LSDptr lspop b_len,dest_LSDptr lspop b_len,b_len);
  83           copy_loop_lsp(a_LSDptr lspop 2*b_len,dest_LSDptr lspop 2*b_len,lendiff-b_len);
  84           if (carry) { dec_loop_lsp(dest_LSDptr lspop 2*b_len,lendiff-b_len); }
  85         }
  86     }
  87     else
  88     { // Standard division.
  89       var uintD b0inv = div2adic(1,lspref(b_LSDptr,0)); // b'
  90       copy_loop_lsp(a_LSDptr,dest_LSDptr,a_len); // d := a
  91       do { var uintD digit = lspref(dest_LSDptr,0); // nächstes d[j]
  92            digit = mul2adic(b0inv,digit);
  93            // digit = nächstes c[j]
  94            if (a_len <= b_len)
  95              { mulusub_loop_lsp(digit,b_LSDptr,dest_LSDptr,a_len); } // d := d - b * c[j] * beta^j
  96              else
  97              // a_len > b_len, b wird als durch Nullen fortgesetzt gedacht.
  98              { var uintD carry = mulusub_loop_lsp(digit,b_LSDptr,dest_LSDptr,b_len);
  99                if (lspref(dest_LSDptr,b_len) >= carry)
 100                  { lspref(dest_LSDptr,b_len) -= carry; }
 101                else
 102                  { lspref(dest_LSDptr,b_len) -= carry;
 103                    dec_loop_lsp(dest_LSDptr lspop (b_len+1),a_len-(b_len+1));
 104              }   }
 105            // Nun ist lspref(dest_LSDptr,0) = 0.
 106            lspref(dest_LSDptr,0) = digit; // c[j] ablegen
 107            lsshrink(dest_LSDptr); a_len--; // nächstes j
 108          }
 109          until (a_len==lendiff);
 110     }
 111 }
 112 // Bit complexity (N = max(a_len,b_len)): O(M(N)).
 113