// recip2adic(). // General includes. #include "cl_sysdep.h" // Specification. #include "cl_2DS.h" // Implementation. #include "cl_2D.h" #include "cl_DS.h" // Break-even point of the Newton iteration vs. standard div2adic. const unsigned int recip2adic_threshold = 380; void recip2adic (uintC len, const uintD* a_LSDptr, uintD* dest_LSDptr) { // Method: // If len < threshold, use regular 2-adic division. // Else [Newton iteration] set n := ceiling(len/2), // compute recursively b := recip2adic(a mod 2^(intDsize*n)), // return 2*b-a*b^2 mod 2^(intDsize*2*n). CL_ALLOCA_STACK; var uintL k = 0; // number of Newton steps var uintL n = len; while (n >= recip2adic_threshold) { n = ceiling(n,2); k++; } // Nonrecursive step. var uintD* one_LSDptr; num_stack_alloc(n,,one_LSDptr=); lspref(one_LSDptr,0) = 1; clear_loop_lsp(one_LSDptr lspop 1,n-1); div2adic(n,one_LSDptr,a_LSDptr,dest_LSDptr); // Newton iteration. if (k > 0) { var uintD* b2_LSDptr; var uintD* prod_LSDptr; num_stack_alloc(len+1,,b2_LSDptr=); num_stack_alloc(2*(uintL)len,,prod_LSDptr=); do { // n = ceiling(len/2^k) // Compute n2 = ceiling(len/2^(k-1)), // then n = ceiling(n2/2). k--; var uintL n2 = ((len-1)>>k)+1; // = 2*n or = 2*n-1 // Set b := 2*b-a*b^2 mod 2^(intDsize*n2) cl_UDS_mul_square(dest_LSDptr,n,b2_LSDptr); // b^2 cl_UDS_mul(b2_LSDptr,n2,a_LSDptr,n2,prod_LSDptr); // a*b^2 clear_loop_lsp(dest_LSDptr lspop n,n2-n); shift1left_loop_lsp(dest_LSDptr,n+1); // (n+1 instead of n2 is ok) subfrom_loop_lsp(prod_LSDptr,dest_LSDptr,n2); n = n2; } while (k > 0); } } // Bit complexity (N := len): O(M(N)).