src/base/digitseq/cl_2DS_recip.cc

   1 // recip2adic().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 // Specification.
   7 #include "cl_2DS.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cl_2D.h"
  13 #include "cl_DS.h"
  14
  15 // Break-even point of the Newton iteration vs. standard div2adic.
  16 #if CL_USE_GMP
  17 const unsigned int recip2adic_threshold = 620;
  18 #else
  19 // Use the old default values from CLN version <= 1.0.3 as a crude estimate.
  20 const unsigned int recip2adic_threshold = 380;
  21 #endif
  22
  23 void recip2adic (uintC len, const uintD* a_LSDptr, uintD* dest_LSDptr)
  24 {
  25         // Method:
  26         // If len < threshold, use regular 2-adic division.
  27         // Else [Newton iteration] set n := ceiling(len/2),
  28         //   compute recursively b := recip2adic(a mod 2^(intDsize*n)),
  29         //   return 2*b-a*b^2 mod 2^(intDsize*2*n).
  30         CL_ALLOCA_STACK;
  31         var uintL k = 0; // number of Newton steps
  32         var uintL n = len;
  33         while (n >= recip2adic_threshold) {
  34                 n = ceiling(n,2);
  35                 k++;
  36         }
  37         // Nonrecursive step.
  38         var uintD* one_LSDptr;
  39         num_stack_alloc(n,,one_LSDptr=);
  40         lspref(one_LSDptr,0) = 1;
  41         clear_loop_lsp(one_LSDptr lspop 1,n-1);
  42         div2adic(n,one_LSDptr,a_LSDptr,dest_LSDptr);
  43         // Newton iteration.
  44         if (k > 0) {
  45                 var uintD* b2_LSDptr;
  46                 var uintD* prod_LSDptr;
  47                 num_stack_alloc(len+1,,b2_LSDptr=);
  48                 num_stack_alloc(2*(uintL)len,,prod_LSDptr=);
  49                 do {
  50                         // n = ceiling(len/2^k)
  51                         // Compute n2 = ceiling(len/2^(k-1)),
  52                         // then n = ceiling(n2/2).
  53                         k--;
  54                         var uintL n2 = ((len-1)>>k)+1; // = 2*n or = 2*n-1
  55                         // Set b := 2*b-a*b^2 mod 2^(intDsize*n2)
  56                         cl_UDS_mul_square(dest_LSDptr,n,b2_LSDptr); // b^2
  57                         cl_UDS_mul(b2_LSDptr,n2,a_LSDptr,n2,prod_LSDptr); // a*b^2
  58                         clear_loop_lsp(dest_LSDptr lspop n,n2-n);
  59                         shift1left_loop_lsp(dest_LSDptr,n+1); // (n+1 instead of n2 is ok)
  60                         subfrom_loop_lsp(prod_LSDptr,dest_LSDptr,n2);
  61                         n = n2;
  62                 } while (k > 0);
  63         }
  64 }
  65 // Bit complexity (N := len): O(M(N)).
  66