numtheory/cl_nt_sqrtmodp.cc

   1 // sqrt_mod_p().
   2
   3 // General includes.
   4 #include "cl_sysdep.h"
   5
   6 // Specification.
   7 #include "cln/numtheory.h"
   8
   9
  10 // Implementation.
  11
  12 #include "cl_I.h"
  13 #include "cln/abort.h"
  14
  15 #undef floor
  16 #include <cmath>
  17 #define floor cln_floor
  18
  19 // MacOS X does "#define _R 0x00040000L".  Grr...
  20 #undef _R
  21
  22 namespace cln {
  23
  24 // Algorithm 1 (for very small p only):
  25 // Try different values.
  26 // Assume p is prime and a nonzero square in Z/pZ.
  27 static uint32 search_sqrt (uint32 p, uint32 a)
  28 {
  29         var uint32 x = 1;
  30         var uint32 x2 = 1;
  31         loop {
  32                 // 0 < x <= p/2, x2 = x^2 mod p.
  33                 if (x2 == a)
  34                         return x;
  35                 x2 += x; x++; x2 += x;
  36                 if (x2 >= p)
  37                         x2 -= p;
  38         }
  39 }
  40
  41 // Algorithm 2 (for p > 2 only):
  42 // Cantor-Zassenhaus.
  43 // [Beth et al.: Computer Algebra, 1988, Kapitel 5.3.3.]
  44 // [Cohen, A Course in Computational Algebraic Number Theory,
  45 //  Section 3.4.4., Algorithm 3.4.6.]
  46 // Input: R = Z/pZ with p>2, and a (nonzero square in R).
  47 static const sqrt_mod_p_t cantor_zassenhaus_sqrt (const cl_modint_ring& R, const cl_MI& a);
  48         // Compute in the polynomial ring R[X]/(X^2-a).
  49         struct pol2 {
  50                 // A polynomial c0+c1*X mod (X^2-a)
  51                 cl_MI c0;
  52                 cl_MI c1;
  53                 // Constructor.
  54                 pol2 (const cl_MI& _c0, const cl_MI& _c1) : c0 (_c0), c1 (_c1) {}
  55         };
  56         struct pol2ring {
  57                 const cl_modint_ring& R;
  58                 const cl_MI& a;
  59                 const pol2 zero ()
  60                 {
  61                         return pol2(R->zero(),R->zero());
  62                 }
  63                 const pol2 one ()
  64                 {
  65                         return pol2(R->one(),R->zero());
  66                 }
  67                 const pol2 plus (const pol2& u, const pol2& v)
  68                 {
  69                         return pol2(u.c0+v.c0, u.c1+v.c1);
  70                 }
  71                 const pol2 minus (const pol2& u, const pol2& v)
  72                 {
  73                         return pol2(u.c0-v.c0, u.c1-v.c1);
  74                 }
  75                 const pol2 mul (const pol2& u, const pol2& v)
  76                 {
  77                         return pol2(u.c0*v.c0+u.c1*v.c1*a, u.c0*v.c1+u.c1*v.c0);
  78                 }
  79                 const pol2 square (const pol2& u)
  80                 {
  81                         return pol2(cln::square(u.c0) + cln::square(u.c1)*a, (u.c0*u.c1)<<1);
  82                 }
  83                 const pol2 expt_pos (const pol2& x, const cl_I& y)
  84                 {
  85                         // Right-Left Binary, [Cohen, Algorithm 1.2.1.]
  86                         var pol2 a = x;
  87                         var cl_I b = y;
  88                         while (!oddp(b)) { a = square(a); b = b = b >> 1; } // a^b = x^y
  89                         var pol2 c = a;
  90                         until (eq(b,1)) {
  91                                 b = b >> 1;
  92                                 a = square(a);
  93                                 // a^b*c = x^y
  94                                 if (oddp(b))
  95                                         c = mul(a,c);
  96                         }
  97                         return c;
  98                 }
  99                 const pol2 random ()
 100                 {
 101                         return pol2(R->random(),R->random());
 102                 }
 103                 // Computes the degree of gcd(u(X),X^2-a) and, if it is 1,
 104                 // also the zero if this polynomial of degree 1.
 105                 struct gcd_result {
 106                         cl_composite_condition* condition;
 107                         int gcd_degree;
 108                         cl_MI solution;
 109                         // Constructors.
 110                         gcd_result (cl_composite_condition* c) : condition (c) {}
 111                         gcd_result (int deg) : condition (NULL), gcd_degree (deg) {}
 112                         gcd_result (int deg, const cl_MI& sol) : condition (NULL), gcd_degree (deg), solution (sol) {}
 113                 };
 114                 const gcd_result gcd (const pol2& u)
 115                 {
 116                         if (zerop(u.c1))
 117                                 // constant polynomial u(X)
 118                                 if (zerop(u.c0))
 119                                         return gcd_result(2);
 120                                 else
 121                                         return gcd_result(0);
 122                         // u(X) = c0 + c1*X has zero -c0/c1.
 123                         var cl_MI_x c1inv = R->recip(u.c1);
 124                         if (c1inv.condition)
 125                                 return c1inv.condition;
 126                         var cl_MI z = -u.c0*c1inv;
 127                         if (cln::square(z) == a)
 128                                 return gcd_result(1,z);
 129                         else
 130                                 return gcd_result(0);
 131                 }
 132                 // Constructor.
 133                 pol2ring (const cl_modint_ring& _R, const cl_MI& _a) : R (_R), a (_a) {}
 134         };
 135 static const sqrt_mod_p_t cantor_zassenhaus_sqrt (const cl_modint_ring& R, const cl_MI& a)
 136 {
 137         var pol2ring PR = pol2ring(R,a);
 138         var cl_I& p = R->modulus;
 139         // Assuming p is a prime, then R[X]/(X^2-a) is the direct product of
 140         // two rings R[X]/(X-sqrt(a)), each being isomorphic to R. Thus taking
 141         // a (p-1)/2-th power in this ring will return one of (0,+1,-1) in
 142         // each ring, with independent probabilities (1/p, (p-1)/2p, (p-1)/2p).
 143         // For any polynomial u(X), setting v(X) := u(X)^((p-1)/2) yields
 144         // gcd(u(X),X^2-a) * gcd(v(X)-1,X^2-a) * gcd(v(X)+1,X^2-a) = X^2-a.
 145         // If p is not prime, all of these gcd's are likely to be 1.
 146         var cl_I e = (p-1) >> 1;
 147         loop {
 148                 // Choose a random polynomial u(X) in the ring.
 149                 var pol2 u = PR.random();
 150                 // Compute v(X) = u(X)^((p-1)/2).
 151                 var pol2 v = PR.expt_pos(u,e);
 152                 // Compute the three gcds.
 153                 var pol2ring::gcd_result g1 = PR.gcd(PR.minus(v,PR.one()));
 154                 if (g1.condition)
 155                         return g1.condition;
 156                 if (g1.gcd_degree == 1)
 157                         return sqrt_mod_p_t(2,g1.solution,-g1.solution);
 158                 if (g1.gcd_degree == 2)
 159                         continue;
 160                 var pol2ring::gcd_result g2 = PR.gcd(PR.plus(v,PR.one()));
 161                 if (g2.condition)
 162                         return g2.condition;
 163                 if (g2.gcd_degree == 1)
 164                         return sqrt_mod_p_t(2,g2.solution,-g2.solution);
 165                 if (g2.gcd_degree == 2)
 166                         continue;
 167                 var pol2ring::gcd_result g3 = PR.gcd(u);
 168                 if (g3.condition)
 169                         return g3.condition;
 170                 if (g3.gcd_degree == 1)
 171                         return sqrt_mod_p_t(2,g3.solution,-g3.solution);
 172                 if (g1.gcd_degree + g2.gcd_degree + g3.gcd_degree < 2)
 173                         // If the sum of the degrees of the gcd is != 2,
 174                         // p cannot be prime.
 175                         return new cl_composite_condition(p);
 176         }
 177 }
 178
 179 #if defined(__GNUC__) && defined(__s390__) && (__GNUC__ == 2)  // Workaround GCC-bug (see below)
 180                 struct cl_sylow2gen_property : public cl_property {
 181                         SUBCLASS_cl_property();
 182                 public:
 183                         cl_I h_rep;
 184                         // Constructor.
 185                         cl_sylow2gen_property (const cl_symbol& k, const cl_MI& h) : cl_property (k), h_rep (h.rep) {}
 186                 };
 187 #endif
 188
 189 // Algorithm 3 (for p > 2 only):
 190 // Tonelli-Shanks.
 191 // [Cohen, A Course in Computational Algebraic Number Theory,
 192 //  Section 1.5.1., Algorithm 1.5.1.]
 193 static const sqrt_mod_p_t tonelli_shanks_sqrt (const cl_modint_ring& R, const cl_MI& a)
 194 {
 195         // Idea:
 196         // Write p-1 = 2^e*m, m odd. G = (Z/pZ)^* (cyclic of order p-1) has
 197         // subgroups G_0 < G_1 < ... < G_e, G_j of order 2^j. (G_e is called
 198         // the "2-Sylow subgroup" of G.) More precisely
 199         //          G_j = { x in (Z/pZ)^* : x^(2^j) = 1 },
 200         //        G/G_j = { x^(2^j) : x in (Z/pZ)^* }.
 201         // We compute the square root of a first in G/G_e, then lift it to
 202         // G/G_(e-1), etc., up to G/G_0.
 203         // Start with b = a^((m+1)/2), then (a^-1*b^2)^(2^e) = 1, i.e.
 204         // a = b^2 in G/G_e.
 205         // Lifting from G/G_j to G/G_(j-1) is easy: Assume a = b^2 in G/G_j.
 206         // If a = b^2 in G/G_(j-1), then nothing needs to be done. Else
 207         // a^-1*b^2 is in G_j \ G_(j-1). If j=e, a^-1*b^2 is a non-square
 208         // mod p, hence a is a non-square as well, contradiction. If j<e,
 209         // take h in G_(j+1) \ G_j, so that h^2 in G_j \ G_(j-1), and
 210         // a^-1*b^2*h^2 is in G_(j-1). So multiply b with h.
 211         var cl_I& p = R->modulus;
 212         var uintL e = ord2(p-1);
 213         var cl_I m = (p-1) >> e;
 214         // p-1 = 2^e*m, m odd.
 215         // We will have the invariant c = a^-1*b^2 in G/G_j.
 216         var uintL j = e;
 217         // Initialize b = a^((m+1)/2), c = a^m, but avoid to divide by a.
 218         var cl_MI c = R->expt_pos(a,(m-1)>>1);
 219         var cl_MI b = R->mul(a,c);
 220         c = R->mul(b,c);
 221         // Find h in G_e \ G_(e-1): h = h'^m, where h' is any non-square.
 222         var cl_MI h;
 223         if (e==1)
 224                 h = - R->one();
 225         else {
 226                 // Since this computation is a bit costly, we cache its result
 227                 // on the ring's property list.
 228                 static const cl_symbol key = (cl_symbol)(cl_string)"generator of 2-Sylow subgroup of (Z/pZ)^*";
 229 #if !(defined(__GNUC__) && defined(__s390__) && (__GNUC__ == 2))  // Workaround GCC-bug (see above)
 230                 struct cl_sylow2gen_property : public cl_property {
 231                         SUBCLASS_cl_property();
 232                 public:
 233                         cl_I h_rep;
 234                         // Constructor.
 235                         cl_sylow2gen_property (const cl_symbol& k, const cl_MI& h) : cl_property (k), h_rep (h.rep) {}
 236                 };
 237 #endif
 238                 var cl_sylow2gen_property* prop = (cl_sylow2gen_property*) R->get_property(key);
 239                 if (prop)
 240                         h = cl_MI(R,prop->h_rep);
 241                 else {
 242                         do { h = R->random(); }
 243                            until (jacobi(R->retract(h),p) == -1);
 244                         h = R->expt_pos(h,m);
 245                         R->add_property(new cl_sylow2gen_property(key,h));
 246                 }
 247         }
 248         do {
 249                 // Now c = a^-1*b^2 in G_j, h in G_j \ G_(j-1).
 250                 // Determine the smallest i such that c in G_i.
 251                 var uintL i = 0;
 252                 var cl_MI ci = c; // c_i = c^(2^i)
 253                 for ( ; i < j; i++, ci = R->square(ci))
 254                         if (ci == R->one())
 255                                 break;
 256                 if (i==j)
 257                         // Some problem: if j=e, a non-square, if j<e, the
 258                         // previous iteration didn't do its job correctly.
 259                         // Indicates that p is not prime.
 260                         return new cl_composite_condition(p);
 261                 // OK, i < j.
 262                 for (var uintL count = j-i-1; count > 0; count--)
 263                         h = R->square(h);
 264                 // Now h in G_(i+1) \ G_i.
 265                 b = R->mul(b,h);
 266                 h = R->square(h);
 267                 c = R->mul(c,h);
 268                 // Now c = a^-1*b^2 in G_(i-1), h in G_i \ G_(i-1).
 269                 j = i;
 270         } while (j > 0);
 271         if (R->square(b) != a)
 272                 // Problem again.
 273                 return new cl_composite_condition(p);
 274         return sqrt_mod_p_t(2,b,-b);
 275 }
 276
 277 // Break-Even-Points (on a i486 with 33 MHz):
 278 // Algorithm 1 fastest for p < 1500..2000
 279 // Algorithm 3 generally fastest for p > 2000.
 280 // But the running time of algorithm 3 is proportional to e^2.
 281 // For large e, algorithm 2 becomes faster.
 282 // l=50 bits: for e >= 40
 283 // l=100 bits: for e >= 55
 284 // l=200 bits: for e >= 80
 285 // l=400 bits: for e >= 130
 286 // in general something like  e > l/(log(l)/(2*log(2))-1).
 287
 288 const sqrt_mod_p_t sqrt_mod_p (const cl_modint_ring& R, const cl_MI& a)
 289 {
 290         if (!(a.ring() == R)) cl_abort();
 291         var cl_I& p = R->modulus;
 292         var cl_I aa = R->retract(a);
 293         switch (jacobi(aa,p)) {
 294                 case -1: // no solution
 295                         return sqrt_mod_p_t(0);
 296                 case 0: // gcd(aa,p) > 1
 297                         if (zerop(a))
 298                                 // one solution
 299                                 return sqrt_mod_p_t(1,a);
 300                         else
 301                                 // found factor of p
 302                                 return new cl_composite_condition(p,gcd(aa,p));
 303                 case 1: // two solutions
 304                         break;
 305         }
 306         if (p < 2000) {
 307                 // Algorithm 1.
 308                 var cl_I x1 = search_sqrt(cl_I_to_UL(p),cl_I_to_UL(aa));
 309                 var cl_I x2 = p-x1;
 310                 if (x1==x2) // can only happen when p = 2
 311                         return sqrt_mod_p_t(1,R->canonhom(x1));
 312                 else
 313                         return sqrt_mod_p_t(2,R->canonhom(x1),R->canonhom(x2));
 314         }
 315         var uintL l = integer_length(p);
 316         var uintL e = ord2(p-1);
 317         //if (e > 30 && e > l/(log((double)l)*0.72-1))
 318         if (e > 30 && e > l/(::log((double)l)*0.92-2.41))
 319                 // Algorithm 2.
 320                 return cantor_zassenhaus_sqrt(R,a);
 321         else
 322                 // Algorithm 3.
 323                 return tonelli_shanks_sqrt(R,a);
 324 }
 325
 326 }  // namespace cln