ginac/factor.cpp

   1 /** @file factor.cpp
   2  *
   3  *  Polynomial factorization (implementation).
   4  *
   5  *  The interface function factor() at the end of this file is defined in the
   6  *  GiNaC namespace. All other utility functions and classes are defined in an
   7  *  additional anonymous namespace.
   8  *
   9  *  Factorization starts by doing a square free factorization and making the
  10  *  coefficients integer. Then, depending on the number of free variables it
  11  *  proceeds either in dedicated univariate or multivariate factorization code.
  12  *
  13  *  Univariate factorization does a modular factorization via Berlekamp's
  14  *  algorithm and distinct degree factorization. Hensel lifting is used at the
  15  *  end.
  16  *
  17  *  Multivariate factorization uses the univariate factorization (applying a
  18  *  evaluation homomorphism first) and Hensel lifting raises the answer to the
  19  *  multivariate domain. The Hensel lifting code is completely distinct from the
  20  *  code used by the univariate factorization.
  21  *
  22  *  Algorithms used can be found in
  23  *    [Wan] An Improved Multivariate Polynomial Factoring Algorithm,
  24  *          P.S.Wang,
  25  *          Mathematics of Computation, Vol. 32, No. 144 (1978) 1215--1231.
  26  *    [GCL] Algorithms for Computer Algebra,
  27  *          K.O.Geddes, S.R.Czapor, G.Labahn,
  28  *          Springer Verlag, 1992.
  29  *    [Mig] Some Useful Bounds,
  30  *          M.Mignotte,
  31  *          In "Computer Algebra, Symbolic and Algebraic Computation" (B.Buchberger et al., eds.),
  32  *          pp. 259-263, Springer-Verlag, New York, 1982.
  33  */
  34
  35 /*
  36  *  GiNaC Copyright (C) 1999-2022 Johannes Gutenberg University Mainz, Germany
  37  *
  38  *  This program is free software; you can redistribute it and/or modify
  39  *  it under the terms of the GNU General Public License as published by
  40  *  the Free Software Foundation; either version 2 of the License, or
  41  *  (at your option) any later version.
  42  *
  43  *  This program is distributed in the hope that it will be useful,
  44  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  45  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  46  *  GNU General Public License for more details.
  47  *
  48  *  You should have received a copy of the GNU General Public License
  49  *  along with this program; if not, write to the Free Software
  50  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  51  */
  52
  53 //#define DEBUGFACTOR
  54
  55 #include "factor.h"
  56
  57 #include "ex.h"
  58 #include "numeric.h"
  59 #include "operators.h"
  60 #include "inifcns.h"
  61 #include "symbol.h"
  62 #include "relational.h"
  63 #include "power.h"
  64 #include "mul.h"
  65 #include "normal.h"
  66 #include "add.h"
  67
  68 #include <type_traits>
  69 #include <algorithm>
  70 #include <limits>
  71 #include <list>
  72 #include <vector>
  73 #include <stack>
  74 #ifdef DEBUGFACTOR
  75 #include <ostream>
  76 #endif
  77 using namespace std;
  78
  79 #include <cln/cln.h>
  80 using namespace cln;
  81
  82 namespace GiNaC {
  83
  84 // anonymous namespace to hide all utility functions
  85 namespace {
  86
  87 #ifdef DEBUGFACTOR
  88 #define DCOUT(str) cout << #str << endl
  89 #define DCOUTVAR(var) cout << #var << ": " << var << endl
  90 #define DCOUT2(str,var) cout << #str << ": " << var << endl
  91 ostream& operator<<(ostream& o, const vector<int>& v)
  92 {
  93         auto i = v.begin(), end = v.end();
  94         while ( i != end ) {
  95                 o << *i << " ";
  96                 ++i;
  97         }
  98         return o;
  99 }
 100 static ostream& operator<<(ostream& o, const vector<cl_I>& v)
 101 {
 102         auto i = v.begin(), end = v.end();
 103         while ( i != end ) {
 104                 o << *i << "[" << i-v.begin() << "]" << " ";
 105                 ++i;
 106         }
 107         return o;
 108 }
 109 static ostream& operator<<(ostream& o, const vector<cl_MI>& v)
 110 {
 111         auto i = v.begin(), end = v.end();
 112         while ( i != end ) {
 113                 o << *i << "[" << i-v.begin() << "]" << " ";
 114                 ++i;
 115         }
 116         return o;
 117 }
 118 ostream& operator<<(ostream& o, const vector<numeric>& v)
 119 {
 120         for ( size_t i=0; i<v.size(); ++i ) {
 121                 o << v[i] << " ";
 122         }
 123         return o;
 124 }
 125 ostream& operator<<(ostream& o, const vector<vector<cl_MI>>& v)
 126 {
 127         auto i = v.begin(), end = v.end();
 128         while ( i != end ) {
 129                 o << i-v.begin() << ": " << *i << endl;
 130                 ++i;
 131         }
 132         return o;
 133 }
 134 #else
 135 #define DCOUT(str)
 136 #define DCOUTVAR(var)
 137 #define DCOUT2(str,var)
 138 #endif // def DEBUGFACTOR
 139
 140 ////////////////////////////////////////////////////////////////////////////////
 141 // modular univariate polynomial code
 142
 143 typedef std::vector<cln::cl_MI> umodpoly;
 144 typedef std::vector<cln::cl_I> upoly;
 145 typedef vector<umodpoly> upvec;
 146
 147
 148 // COPY FROM UPOLY.H
 149
 150 // CHANGED size_t -> int !!!
 151 template<typename T> static int degree(const T& p)
 152 {
 153         return p.size() - 1;
 154 }
 155
 156 template<typename T> static typename T::value_type lcoeff(const T& p)
 157 {
 158         return p[p.size() - 1];
 159 }
 160
 161 static bool normalize_in_field(umodpoly& a)
 162 {
 163         if (a.size() == 0)
 164                 return true;
 165         if ( lcoeff(a) == a[0].ring()->one() ) {
 166                 return true;
 167         }
 168
 169         const cln::cl_MI lc_1 = recip(lcoeff(a));
 170         for (std::size_t k = a.size(); k-- != 0; )
 171                 a[k] = a[k]*lc_1;
 172         return false;
 173 }
 174
 175 template<typename T> static void
 176 canonicalize(T& p, const typename T::size_type hint = std::numeric_limits<typename T::size_type>::max())
 177 {
 178         if (p.empty())
 179                 return;
 180
 181         std::size_t i = p.size() - 1;
 182         // Be fast if the polynomial is already canonicalized
 183         if (!zerop(p[i]))
 184                 return;
 185
 186         if (hint < p.size())
 187                 i = hint;
 188
 189         bool is_zero = false;
 190         do {
 191                 if (!zerop(p[i])) {
 192                         ++i;
 193                         break;
 194                 }
 195                 if (i == 0) {
 196                         is_zero = true;
 197                         break;
 198                 }
 199                 --i;
 200         } while (true);
 201
 202         if (is_zero) {
 203                 p.clear();
 204                 return;
 205         }
 206
 207         p.erase(p.begin() + i, p.end());
 208 }
 209
 210 // END COPY FROM UPOLY.H
 211
 212 static void expt_pos(umodpoly& a, unsigned int q)
 213 {
 214         if ( a.empty() ) return;
 215         cl_MI zero = a[0].ring()->zero();
 216         int deg = degree(a);
 217         a.resize(degree(a)*q+1, zero);
 218         for ( int i=deg; i>0; --i ) {
 219                 a[i*q] = a[i];
 220                 a[i] = zero;
 221         }
 222 }
 223
 224 template<typename T> struct uvar_poly_p
 225 {
 226         static const bool value = false;
 227 };
 228
 229 template<> struct uvar_poly_p<upoly>
 230 {
 231         static const bool value = true;
 232 };
 233
 234 template<> struct uvar_poly_p<umodpoly>
 235 {
 236         static const bool value = true;
 237 };
 238
 239 template<typename T>
 240 // Don't define this for anything but univariate polynomials.
 241 static typename enable_if<uvar_poly_p<T>::value, T>::type
 242 operator+(const T& a, const T& b)
 243 {
 244         int sa = a.size();
 245         int sb = b.size();
 246         if ( sa >= sb ) {
 247                 T r(sa);
 248                 int i = 0;
 249                 for ( ; i<sb; ++i ) {
 250                         r[i] = a[i] + b[i];
 251                 }
 252                 for ( ; i<sa; ++i ) {
 253                         r[i] = a[i];
 254                 }
 255                 canonicalize(r);
 256                 return r;
 257         }
 258         else {
 259                 T r(sb);
 260                 int i = 0;
 261                 for ( ; i<sa; ++i ) {
 262                         r[i] = a[i] + b[i];
 263                 }
 264                 for ( ; i<sb; ++i ) {
 265                         r[i] = b[i];
 266                 }
 267                 canonicalize(r);
 268                 return r;
 269         }
 270 }
 271
 272 template<typename T>
 273 // Don't define this for anything but univariate polynomials. Otherwise
 274 // overload resolution might fail (this actually happens when compiling
 275 // GiNaC with g++ 3.4).
 276 static typename enable_if<uvar_poly_p<T>::value, T>::type
 277 operator-(const T& a, const T& b)
 278 {
 279         int sa = a.size();
 280         int sb = b.size();
 281         if ( sa >= sb ) {
 282                 T r(sa);
 283                 int i = 0;
 284                 for ( ; i<sb; ++i ) {
 285                         r[i] = a[i] - b[i];
 286                 }
 287                 for ( ; i<sa; ++i ) {
 288                         r[i] = a[i];
 289                 }
 290                 canonicalize(r);
 291                 return r;
 292         }
 293         else {
 294                 T r(sb);
 295                 int i = 0;
 296                 for ( ; i<sa; ++i ) {
 297                         r[i] = a[i] - b[i];
 298                 }
 299                 for ( ; i<sb; ++i ) {
 300                         r[i] = -b[i];
 301                 }
 302                 canonicalize(r);
 303                 return r;
 304         }
 305 }
 306
 307 static upoly operator*(const upoly& a, const upoly& b)
 308 {
 309         upoly c;
 310         if ( a.empty() || b.empty() ) return c;
 311
 312         int n = degree(a) + degree(b);
 313         c.resize(n+1, 0);
 314         for ( int i=0 ; i<=n; ++i ) {
 315                 for ( int j=0 ; j<=i; ++j ) {
 316                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 317                         c[i] = c[i] + a[j] * b[i-j];
 318                 }
 319         }
 320         canonicalize(c);
 321         return c;
 322 }
 323
 324 static umodpoly operator*(const umodpoly& a, const umodpoly& b)
 325 {
 326         umodpoly c;
 327         if ( a.empty() || b.empty() ) return c;
 328
 329         int n = degree(a) + degree(b);
 330         c.resize(n+1, a[0].ring()->zero());
 331         for ( int i=0 ; i<=n; ++i ) {
 332                 for ( int j=0 ; j<=i; ++j ) {
 333                         if ( j > degree(a) || (i-j) > degree(b) ) continue;
 334                         c[i] = c[i] + a[j] * b[i-j];
 335                 }
 336         }
 337         canonicalize(c);
 338         return c;
 339 }
 340
 341 static upoly operator*(const upoly& a, const cl_I& x)
 342 {
 343         if ( zerop(x) ) {
 344                 upoly r;
 345                 return r;
 346         }
 347         upoly r(a.size());
 348         for ( size_t i=0; i<a.size(); ++i ) {
 349                 r[i] = a[i] * x;
 350         }
 351         return r;
 352 }
 353
 354 static upoly operator/(const upoly& a, const cl_I& x)
 355 {
 356         if ( zerop(x) ) {
 357                 upoly r;
 358                 return r;
 359         }
 360         upoly r(a.size());
 361         for ( size_t i=0; i<a.size(); ++i ) {
 362                 r[i] = exquo(a[i],x);
 363         }
 364         return r;
 365 }
 366
 367 static umodpoly operator*(const umodpoly& a, const cl_MI& x)
 368 {
 369         umodpoly r(a.size());
 370         for ( size_t i=0; i<a.size(); ++i ) {
 371                 r[i] = a[i] * x;
 372         }
 373         canonicalize(r);
 374         return r;
 375 }
 376
 377 static void upoly_from_ex(upoly& up, const ex& e, const ex& x)
 378 {
 379         // assert: e is in Z[x]
 380         int deg = e.degree(x);
 381         up.resize(deg+1);
 382         int ldeg = e.ldegree(x);
 383         for ( ; deg>=ldeg; --deg ) {
 384                 up[deg] = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 385         }
 386         for ( ; deg>=0; --deg ) {
 387                 up[deg] = 0;
 388         }
 389         canonicalize(up);
 390 }
 391
 392 static void umodpoly_from_upoly(umodpoly& ump, const upoly& e, const cl_modint_ring& R)
 393 {
 394         int deg = degree(e);
 395         ump.resize(deg+1);
 396         for ( ; deg>=0; --deg ) {
 397                 ump[deg] = R->canonhom(e[deg]);
 398         }
 399         canonicalize(ump);
 400 }
 401
 402 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_modint_ring& R)
 403 {
 404         // assert: e is in Z[x]
 405         int deg = e.degree(x);
 406         ump.resize(deg+1);
 407         int ldeg = e.ldegree(x);
 408         for ( ; deg>=ldeg; --deg ) {
 409                 cl_I coeff = the<cl_I>(ex_to<numeric>(e.coeff(x, deg)).to_cl_N());
 410                 ump[deg] = R->canonhom(coeff);
 411         }
 412         for ( ; deg>=0; --deg ) {
 413                 ump[deg] = R->zero();
 414         }
 415         canonicalize(ump);
 416 }
 417
 418 #ifdef DEBUGFACTOR
 419 static void umodpoly_from_ex(umodpoly& ump, const ex& e, const ex& x, const cl_I& modulus)
 420 {
 421         umodpoly_from_ex(ump, e, x, find_modint_ring(modulus));
 422 }
 423 #endif
 424
 425 static ex upoly_to_ex(const upoly& a, const ex& x)
 426 {
 427         if ( a.empty() ) return 0;
 428         ex e;
 429         for ( int i=degree(a); i>=0; --i ) {
 430                 e += numeric(a[i]) * pow(x, i);
 431         }
 432         return e;
 433 }
 434
 435 static ex umodpoly_to_ex(const umodpoly& a, const ex& x)
 436 {
 437         if ( a.empty() ) return 0;
 438         cl_modint_ring R = a[0].ring();
 439         cl_I mod = R->modulus;
 440         cl_I halfmod = (mod-1) >> 1;
 441         ex e;
 442         for ( int i=degree(a); i>=0; --i ) {
 443                 cl_I n = R->retract(a[i]);
 444                 if ( n > halfmod ) {
 445                         e += numeric(n-mod) * pow(x, i);
 446                 } else {
 447                         e += numeric(n) * pow(x, i);
 448                 }
 449         }
 450         return e;
 451 }
 452
 453 static upoly umodpoly_to_upoly(const umodpoly& a)
 454 {
 455         upoly e(a.size());
 456         if ( a.empty() ) return e;
 457         cl_modint_ring R = a[0].ring();
 458         cl_I mod = R->modulus;
 459         cl_I halfmod = (mod-1) >> 1;
 460         for ( int i=degree(a); i>=0; --i ) {
 461                 cl_I n = R->retract(a[i]);
 462                 if ( n > halfmod ) {
 463                         e[i] = n-mod;
 464                 } else {
 465                         e[i] = n;
 466                 }
 467         }
 468         return e;
 469 }
 470
 471 static umodpoly umodpoly_to_umodpoly(const umodpoly& a, const cl_modint_ring& R, unsigned int m)
 472 {
 473         umodpoly e;
 474         if ( a.empty() ) return e;
 475         cl_modint_ring oldR = a[0].ring();
 476         size_t sa = a.size();
 477         e.resize(sa+m, R->zero());
 478         for ( size_t i=0; i<sa; ++i ) {
 479                 e[i+m] = R->canonhom(oldR->retract(a[i]));
 480         }
 481         canonicalize(e);
 482         return e;
 483 }
 484
 485 /** Divides all coefficients of the polynomial a by the integer x.
 486  *  All coefficients are supposed to be divisible by x. If they are not, the
 487  *  the<cl_I> cast will raise an exception.
 488  *
 489  *  @param[in,out] a  polynomial of which the coefficients will be reduced by x
 490  *  @param[in]     x  integer that divides the coefficients
 491  */
 492 static void reduce_coeff(umodpoly& a, const cl_I& x)
 493 {
 494         if ( a.empty() ) return;
 495
 496         cl_modint_ring R = a[0].ring();
 497         for (auto & i : a) {
 498                 // cln cannot perform this division in the modular field
 499                 cl_I c = R->retract(i);
 500                 i = cl_MI(R, the<cl_I>(c / x));
 501         }
 502 }
 503
 504 /** Calculates remainder of a/b.
 505  *  Assertion: a and b not empty.
 506  *
 507  *  @param[in]  a  polynomial dividend
 508  *  @param[in]  b  polynomial divisor
 509  *  @param[out] r  polynomial remainder
 510  */
 511 static void rem(const umodpoly& a, const umodpoly& b, umodpoly& r)
 512 {
 513         int k, n;
 514         n = degree(b);
 515         k = degree(a) - n;
 516         r = a;
 517         if ( k < 0 ) return;
 518
 519         do {
 520                 cl_MI qk = div(r[n+k], b[n]);
 521                 if ( !zerop(qk) ) {
 522                         for ( int i=0; i<n; ++i ) {
 523                                 unsigned int j = n + k - 1 - i;
 524                                 r[j] = r[j] - qk * b[j-k];
 525                         }
 526                 }
 527         } while ( k-- );
 528
 529         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 530         canonicalize(r);
 531 }
 532
 533 /** Calculates quotient of a/b.
 534  *  Assertion: a and b not empty.
 535  *
 536  *  @param[in]  a  polynomial dividend
 537  *  @param[in]  b  polynomial divisor
 538  *  @param[out] q  polynomial quotient
 539  */
 540 static void div(const umodpoly& a, const umodpoly& b, umodpoly& q)
 541 {
 542         int k, n;
 543         n = degree(b);
 544         k = degree(a) - n;
 545         q.clear();
 546         if ( k < 0 ) return;
 547
 548         umodpoly r = a;
 549         q.resize(k+1, a[0].ring()->zero());
 550         do {
 551                 cl_MI qk = div(r[n+k], b[n]);
 552                 if ( !zerop(qk) ) {
 553                         q[k] = qk;
 554                         for ( int i=0; i<n; ++i ) {
 555                                 unsigned int j = n + k - 1 - i;
 556                                 r[j] = r[j] - qk * b[j-k];
 557                         }
 558                 }
 559         } while ( k-- );
 560
 561         canonicalize(q);
 562 }
 563
 564 /** Calculates quotient and remainder of a/b.
 565  *  Assertion: a and b not empty.
 566  *
 567  *  @param[in]  a  polynomial dividend
 568  *  @param[in]  b  polynomial divisor
 569  *  @param[out] r  polynomial remainder
 570  *  @param[out] q  polynomial quotient
 571  */
 572 static void remdiv(const umodpoly& a, const umodpoly& b, umodpoly& r, umodpoly& q)
 573 {
 574         int k, n;
 575         n = degree(b);
 576         k = degree(a) - n;
 577         q.clear();
 578         r = a;
 579         if ( k < 0 ) return;
 580
 581         q.resize(k+1, a[0].ring()->zero());
 582         do {
 583                 cl_MI qk = div(r[n+k], b[n]);
 584                 if ( !zerop(qk) ) {
 585                         q[k] = qk;
 586                         for ( int i=0; i<n; ++i ) {
 587                                 unsigned int j = n + k - 1 - i;
 588                                 r[j] = r[j] - qk * b[j-k];
 589                         }
 590                 }
 591         } while ( k-- );
 592
 593         fill(r.begin()+n, r.end(), a[0].ring()->zero());
 594         canonicalize(r);
 595         canonicalize(q);
 596 }
 597
 598 /** Calculates the GCD of polynomial a and b.
 599  *
 600  *  @param[in]  a  polynomial
 601  *  @param[in]  b  polynomial
 602  *  @param[out] c  GCD
 603  */
 604 static void gcd(const umodpoly& a, const umodpoly& b, umodpoly& c)
 605 {
 606         if ( degree(a) < degree(b) ) return gcd(b, a, c);
 607
 608         c = a;
 609         normalize_in_field(c);
 610         umodpoly d = b;
 611         normalize_in_field(d);
 612         umodpoly r;
 613         while ( !d.empty() ) {
 614                 rem(c, d, r);
 615                 c = d;
 616                 d = r;
 617         }
 618         normalize_in_field(c);
 619 }
 620
 621 /** Calculates the derivative of the polynomial a.
 622  *
 623  *  @param[in]  a  polynomial of which to take the derivative
 624  *  @param[out] d  result/derivative
 625  */
 626 static void deriv(const umodpoly& a, umodpoly& d)
 627 {
 628         d.clear();
 629         if ( a.size() <= 1 ) return;
 630
 631         d.insert(d.begin(), a.begin()+1, a.end());
 632         int max = d.size();
 633         for ( int i=1; i<max; ++i ) {
 634                 d[i] = d[i] * (i+1);
 635         }
 636         canonicalize(d);
 637 }
 638
 639 static bool unequal_one(const umodpoly& a)
 640 {
 641         if ( a.empty() ) return true;
 642         return ( a.size() != 1 || a[0] != a[0].ring()->one() );
 643 }
 644
 645 static bool equal_one(const umodpoly& a)
 646 {
 647         return ( a.size() == 1 && a[0] == a[0].ring()->one() );
 648 }
 649
 650 /** Returns true if polynomial a is square free.
 651  *
 652  *  @param[in] a  polynomial to check
 653  *  @return       true if polynomial is square free, false otherwise
 654  */
 655 static bool squarefree(const umodpoly& a)
 656 {
 657         umodpoly b;
 658         deriv(a, b);
 659         if ( b.empty() ) {
 660                 return false;
 661         }
 662         umodpoly c;
 663         gcd(a, b, c);
 664         return equal_one(c);
 665 }
 666
 667 // END modular univariate polynomial code
 668 ////////////////////////////////////////////////////////////////////////////////
 669
 670 ////////////////////////////////////////////////////////////////////////////////
 671 // modular matrix
 672
 673 typedef vector<cl_MI> mvec;
 674
 675 class modular_matrix
 676 {
 677 #ifdef DEBUGFACTOR
 678         friend ostream& operator<<(ostream& o, const modular_matrix& m);
 679 #endif
 680 public:
 681         modular_matrix(size_t r_, size_t c_, const cl_MI& init) : r(r_), c(c_)
 682         {
 683                 m.resize(c*r, init);
 684         }
 685         size_t rowsize() const { return r; }
 686         size_t colsize() const { return c; }
 687         cl_MI& operator()(size_t row, size_t col) { return m[row*c + col]; }
 688         cl_MI operator()(size_t row, size_t col) const { return m[row*c + col]; }
 689         void mul_col(size_t col, const cl_MI x)
 690         {
 691                 for ( size_t rc=0; rc<r; ++rc ) {
 692                         std::size_t i = c*rc + col;
 693                         m[i] = m[i] * x;
 694                 }
 695         }
 696         void sub_col(size_t col1, size_t col2, const cl_MI fac)
 697         {
 698                 for ( size_t rc=0; rc<r; ++rc ) {
 699                         std::size_t i1 = col1 + c*rc;
 700                         std::size_t i2 = col2 + c*rc;
 701                         m[i1] = m[i1] - m[i2]*fac;
 702                 }
 703         }
 704         void switch_col(size_t col1, size_t col2)
 705         {
 706                 for ( size_t rc=0; rc<r; ++rc ) {
 707                         std::size_t i1 = col1 + rc*c;
 708                         std::size_t i2 = col2 + rc*c;
 709                         std::swap(m[i1], m[i2]);
 710                 }
 711         }
 712         void mul_row(size_t row, const cl_MI x)
 713         {
 714                 for ( size_t cc=0; cc<c; ++cc ) {
 715                         std::size_t i = row*c + cc;
 716                         m[i] = m[i] * x;
 717                 }
 718         }
 719         void sub_row(size_t row1, size_t row2, const cl_MI fac)
 720         {
 721                 for ( size_t cc=0; cc<c; ++cc ) {
 722                         std::size_t i1 = row1*c + cc;
 723                         std::size_t i2 = row2*c + cc;
 724                         m[i1] = m[i1] - m[i2]*fac;
 725                 }
 726         }
 727         void switch_row(size_t row1, size_t row2)
 728         {
 729                 for ( size_t cc=0; cc<c; ++cc ) {
 730                         std::size_t i1 = row1*c + cc;
 731                         std::size_t i2 = row2*c + cc;
 732                         std::swap(m[i1], m[i2]);
 733                 }
 734         }
 735         bool is_col_zero(size_t col) const
 736         {
 737                 for ( size_t rr=0; rr<r; ++rr ) {
 738                         std::size_t i = col + rr*c;
 739                         if ( !zerop(m[i]) ) {
 740                                 return false;
 741                         }
 742                 }
 743                 return true;
 744         }
 745         bool is_row_zero(size_t row) const
 746         {
 747                 for ( size_t cc=0; cc<c; ++cc ) {
 748                         std::size_t i = row*c + cc;
 749                         if ( !zerop(m[i]) ) {
 750                                 return false;
 751                         }
 752                 }
 753                 return true;
 754         }
 755         void set_row(size_t row, const vector<cl_MI>& newrow)
 756         {
 757                 for (std::size_t i2 = 0; i2 < newrow.size(); ++i2) {
 758                         std::size_t i1 = row*c + i2;
 759                         m[i1] = newrow[i2];
 760                 }
 761         }
 762         mvec::const_iterator row_begin(size_t row) const { return m.begin()+row*c; }
 763         mvec::const_iterator row_end(size_t row) const { return m.begin()+row*c+r; }
 764 private:
 765         size_t r, c;
 766         mvec m;
 767 };
 768
 769 #ifdef DEBUGFACTOR
 770 modular_matrix operator*(const modular_matrix& m1, const modular_matrix& m2)
 771 {
 772         const unsigned int r = m1.rowsize();
 773         const unsigned int c = m2.colsize();
 774         modular_matrix o(r,c,m1(0,0));
 775
 776         for ( size_t i=0; i<r; ++i ) {
 777                 for ( size_t j=0; j<c; ++j ) {
 778                         cl_MI buf;
 779                         buf = m1(i,0) * m2(0,j);
 780                         for ( size_t k=1; k<c; ++k ) {
 781                                 buf = buf + m1(i,k)*m2(k,j);
 782                         }
 783                         o(i,j) = buf;
 784                 }
 785         }
 786         return o;
 787 }
 788
 789 ostream& operator<<(ostream& o, const modular_matrix& m)
 790 {
 791         cl_modint_ring R = m(0,0).ring();
 792         o << "{";
 793         for ( size_t i=0; i<m.rowsize(); ++i ) {
 794                 o << "{";
 795                 for ( size_t j=0; j<m.colsize()-1; ++j ) {
 796                         o << R->retract(m(i,j)) << ",";
 797                 }
 798                 o << R->retract(m(i,m.colsize()-1)) << "}";
 799                 if ( i != m.rowsize()-1 ) {
 800                         o << ",";
 801                 }
 802         }
 803         o << "}";
 804         return o;
 805 }
 806 #endif // def DEBUGFACTOR
 807
 808 // END modular matrix
 809 ////////////////////////////////////////////////////////////////////////////////
 810
 811 /** Calculates the Q matrix for a polynomial. Used by Berlekamp's algorithm.
 812  *
 813  *  The implementation follows algorithm 8.5 of [GCL].
 814  *
 815  *  @param[in]  a_  modular polynomial
 816  *  @param[out] Q   Q matrix
 817  */
 818 static void q_matrix(const umodpoly& a_, modular_matrix& Q)
 819 {
 820         umodpoly a = a_;
 821         normalize_in_field(a);
 822
 823         int n = degree(a);
 824         unsigned int q = cl_I_to_uint(a[0].ring()->modulus);
 825         umodpoly r(n, a[0].ring()->zero());
 826         r[0] = a[0].ring()->one();
 827         Q.set_row(0, r);
 828         unsigned int max = (n-1) * q;
 829         for ( size_t m=1; m<=max; ++m ) {
 830                 cl_MI rn_1 = r.back();
 831                 for ( size_t i=n-1; i>0; --i ) {
 832                         r[i] = r[i-1] - (rn_1 * a[i]);
 833                 }
 834                 r[0] = -rn_1 * a[0];
 835                 if ( (m % q) == 0 ) {
 836                         Q.set_row(m/q, r);
 837                 }
 838         }
 839 }
 840
 841 /** Determine the nullspace of a matrix M-1.
 842  *
 843  *  @param[in,out] M      matrix, will be modified
 844  *  @param[out]    basis  calculated nullspace of M-1
 845  */
 846 static void nullspace(modular_matrix& M, vector<mvec>& basis)
 847 {
 848         const size_t n = M.rowsize();
 849         const cl_MI one = M(0,0).ring()->one();
 850         for ( size_t i=0; i<n; ++i ) {
 851                 M(i,i) = M(i,i) - one;
 852         }
 853         for ( size_t r=0; r<n; ++r ) {
 854                 size_t cc = 0;
 855                 for ( ; cc<n; ++cc ) {
 856                         if ( !zerop(M(r,cc)) ) {
 857                                 if ( cc < r ) {
 858                                         if ( !zerop(M(cc,cc)) ) {
 859                                                 continue;
 860                                         }
 861                                         M.switch_col(cc, r);
 862                                 }
 863                                 else if ( cc > r ) {
 864                                         M.switch_col(cc, r);
 865                                 }
 866                                 break;
 867                         }
 868                 }
 869                 if ( cc < n ) {
 870                         M.mul_col(r, recip(M(r,r)));
 871                         for ( cc=0; cc<n; ++cc ) {
 872                                 if ( cc != r ) {
 873                                         M.sub_col(cc, r, M(r,cc));
 874                                 }
 875                         }
 876                 }
 877         }
 878
 879         for ( size_t i=0; i<n; ++i ) {
 880                 M(i,i) = M(i,i) - one;
 881         }
 882         for ( size_t i=0; i<n; ++i ) {
 883                 if ( !M.is_row_zero(i) ) {
 884                         mvec nu(M.row_begin(i), M.row_end(i));
 885                         basis.push_back(nu);
 886                 }
 887         }
 888 }
 889
 890 /** Berlekamp's modular factorization.
 891  *
 892  *  The implementation follows algorithm 8.4 of [GCL].
 893  *
 894  *  @param[in]  a    modular polynomial
 895  *  @param[out] upv  vector containing modular factors. if upv was not empty the
 896  *                   new elements are added at the end
 897  */
 898 static void berlekamp(const umodpoly& a, upvec& upv)
 899 {
 900         cl_modint_ring R = a[0].ring();
 901         umodpoly one(1, R->one());
 902
 903         // find nullspace of Q matrix
 904         modular_matrix Q(degree(a), degree(a), R->zero());
 905         q_matrix(a, Q);
 906         vector<mvec> nu;
 907         nullspace(Q, nu);
 908
 909         const unsigned int k = nu.size();
 910         if ( k == 1 ) {
 911                 // irreducible
 912                 return;
 913         }
 914
 915         list<umodpoly> factors = {a};
 916         unsigned int size = 1;
 917         unsigned int r = 1;
 918         unsigned int q = cl_I_to_uint(R->modulus);
 919
 920         list<umodpoly>::iterator u = factors.begin();
 921
 922         // calculate all gcd's
 923         while ( true ) {
 924                 for ( unsigned int s=0; s<q; ++s ) {
 925                         umodpoly nur = nu[r];
 926                         nur[0] = nur[0] - cl_MI(R, s);
 927                         canonicalize(nur);
 928                         umodpoly g;
 929                         gcd(nur, *u, g);
 930                         if ( unequal_one(g) && g != *u ) {
 931                                 umodpoly uo;
 932                                 div(*u, g, uo);
 933                                 if ( equal_one(uo) ) {
 934                                         throw logic_error("berlekamp: unexpected divisor.");
 935                                 } else {
 936                                         *u = uo;
 937                                 }
 938                                 factors.push_back(g);
 939                                 size = 0;
 940                                 for (auto & i : factors) {
 941                                         if (degree(i))
 942                                                 ++size;
 943                                 }
 944                                 if ( size == k ) {
 945                                         for (auto & i : factors) {
 946                                                 upv.push_back(i);
 947                                         }
 948                                         return;
 949                                 }
 950                         }
 951                 }
 952                 if ( ++r == k ) {
 953                         r = 1;
 954                         ++u;
 955                 }
 956         }
 957 }
 958
 959 // modular square free factorization is not used at the moment so we deactivate
 960 // the code
 961 #if 0
 962
 963 /** Calculates a^(1/prime).
 964  *
 965  *  @param[in] a      polynomial
 966  *  @param[in] prime  prime number -> exponent 1/prime
 967  *  @param[in] ap     resulting polynomial
 968  */
 969 static void expt_1_over_p(const umodpoly& a, unsigned int prime, umodpoly& ap)
 970 {
 971         size_t newdeg = degree(a)/prime;
 972         ap.resize(newdeg+1);
 973         ap[0] = a[0];
 974         for ( size_t i=1; i<=newdeg; ++i ) {
 975                 ap[i] = a[i*prime];
 976         }
 977 }
 978
 979 /** Modular square free factorization.
 980  *
 981  *  @param[in]  a        polynomial
 982  *  @param[out] factors  modular factors
 983  *  @param[out] mult     corresponding multiplicities (exponents)
 984  */
 985 static void modsqrfree(const umodpoly& a, upvec& factors, vector<int>& mult)
 986 {
 987         const unsigned int prime = cl_I_to_uint(a[0].ring()->modulus);
 988         int i = 1;
 989         umodpoly b;
 990         deriv(a, b);
 991         if ( b.size() ) {
 992                 umodpoly c;
 993                 gcd(a, b, c);
 994                 umodpoly w;
 995                 div(a, c, w);
 996                 while ( unequal_one(w) ) {
 997                         umodpoly y;
 998                         gcd(w, c, y);
 999                         umodpoly z;
1000                         div(w, y, z);
1001                         factors.push_back(z);
1002                         mult.push_back(i);
1003                         ++i;
1004                         w = y;
1005                         umodpoly buf;
1006                         div(c, y, buf);
1007                         c = buf;
1008                 }
1009                 if ( unequal_one(c) ) {
1010                         umodpoly cp;
1011                         expt_1_over_p(c, prime, cp);
1012                         size_t previ = mult.size();
1013                         modsqrfree(cp, factors, mult);
1014                         for ( size_t i=previ; i<mult.size(); ++i ) {
1015                                 mult[i] *= prime;
1016                         }
1017                 }
1018         } else {
1019                 umodpoly ap;
1020                 expt_1_over_p(a, prime, ap);
1021                 size_t previ = mult.size();
1022                 modsqrfree(ap, factors, mult);
1023                 for ( size_t i=previ; i<mult.size(); ++i ) {
1024                         mult[i] *= prime;
1025                 }
1026         }
1027 }
1028
1029 #endif // deactivation of square free factorization
1030
1031 /** Distinct degree factorization (DDF).
1032  *
1033  *  The implementation follows algorithm 8.8 of [GCL].
1034  *
1035  *  @param[in]  a_         modular polynomial
1036  *  @param[out] degrees    vector containing the degrees of the factors of the
1037  *                         corresponding polynomials in ddfactors.
1038  *  @param[out] ddfactors  vector containing polynomials which factors have the
1039  *                         degree given in degrees.
1040  */
1041 static void distinct_degree_factor(const umodpoly& a_, vector<int>& degrees, upvec& ddfactors)
1042 {
1043         umodpoly a = a_;
1044
1045         cl_modint_ring R = a[0].ring();
1046         int q = cl_I_to_int(R->modulus);
1047         int nhalf = degree(a)/2;
1048
1049         int i = 1;
1050         umodpoly w(2);
1051         w[0] = R->zero();
1052         w[1] = R->one();
1053         umodpoly x = w;
1054
1055         while ( i <= nhalf ) {
1056                 expt_pos(w, q);
1057                 umodpoly buf;
1058                 rem(w, a, buf);
1059                 w = buf;
1060                 umodpoly wx = w - x;
1061                 gcd(a, wx, buf);
1062                 if ( unequal_one(buf) ) {
1063                         degrees.push_back(i);
1064                         ddfactors.push_back(buf);
1065                 }
1066                 if ( unequal_one(buf) ) {
1067                         umodpoly buf2;
1068                         div(a, buf, buf2);
1069                         a = buf2;
1070                         nhalf = degree(a)/2;
1071                         rem(w, a, buf);
1072                         w = buf;
1073                 }
1074                 ++i;
1075         }
1076         if ( unequal_one(a) ) {
1077                 degrees.push_back(degree(a));
1078                 ddfactors.push_back(a);
1079         }
1080 }
1081
1082 /** Modular same degree factorization.
1083  *  Same degree factorization is a kind of misnomer. It performs distinct degree
1084  *  factorization, but instead of using the Cantor-Zassenhaus algorithm it
1085  *  (sub-optimally) uses Berlekamp's algorithm for the factors of the same
1086  *  degree.
1087  *
1088  *  @param[in]  a    modular polynomial
1089  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1090  *                   new elements are added at the end
1091  */
1092 static void same_degree_factor(const umodpoly& a, upvec& upv)
1093 {
1094         cl_modint_ring R = a[0].ring();
1095
1096         vector<int> degrees;
1097         upvec ddfactors;
1098         distinct_degree_factor(a, degrees, ddfactors);
1099
1100         for ( size_t i=0; i<degrees.size(); ++i ) {
1101                 if ( degrees[i] == degree(ddfactors[i]) ) {
1102                         upv.push_back(ddfactors[i]);
1103                 } else {
1104                         berlekamp(ddfactors[i], upv);
1105                 }
1106         }
1107 }
1108
1109 // Yes, we can (choose).
1110 #define USE_SAME_DEGREE_FACTOR
1111
1112 /** Modular univariate factorization.
1113  *
1114  *  In principle, we have two algorithms at our disposal: Berlekamp's algorithm
1115  *  and same degree factorization (SDF). SDF seems to be slightly faster in
1116  *  almost all cases so it is activated as default.
1117  *
1118  *  @param[in]  p    modular polynomial
1119  *  @param[out] upv  vector containing modular factors. if upv was not empty the
1120  *                   new elements are added at the end
1121  */
1122 static void factor_modular(const umodpoly& p, upvec& upv)
1123 {
1124 #ifdef USE_SAME_DEGREE_FACTOR
1125         same_degree_factor(p, upv);
1126 #else
1127         berlekamp(p, upv);
1128 #endif
1129 }
1130
1131 /** Calculates modular polynomials s and t such that a*s+b*t==1.
1132  *  Assertion: a and b are relatively prime and not zero.
1133  *
1134  *  @param[in]  a  polynomial
1135  *  @param[in]  b  polynomial
1136  *  @param[out] s  polynomial
1137  *  @param[out] t  polynomial
1138  */
1139 static void exteuclid(const umodpoly& a, const umodpoly& b, umodpoly& s, umodpoly& t)
1140 {
1141         if ( degree(a) < degree(b) ) {
1142                 exteuclid(b, a, t, s);
1143                 return;
1144         }
1145
1146         umodpoly one(1, a[0].ring()->one());
1147         umodpoly c = a; normalize_in_field(c);
1148         umodpoly d = b; normalize_in_field(d);
1149         s = one;
1150         t.clear();
1151         umodpoly d1;
1152         umodpoly d2 = one;
1153         umodpoly q;
1154         while ( true ) {
1155                 div(c, d, q);
1156                 umodpoly r = c - q * d;
1157                 umodpoly r1 = s - q * d1;
1158                 umodpoly r2 = t - q * d2;
1159                 c = d;
1160                 s = d1;
1161                 t = d2;
1162                 if ( r.empty() ) break;
1163                 d = r;
1164                 d1 = r1;
1165                 d2 = r2;
1166         }
1167         cl_MI fac = recip(lcoeff(a) * lcoeff(c));
1168         for (auto & i : s) {
1169                 i = i * fac;
1170         }
1171         canonicalize(s);
1172         fac = recip(lcoeff(b) * lcoeff(c));
1173         for (auto & i : t) {
1174                 i = i * fac;
1175         }
1176         canonicalize(t);
1177 }
1178
1179 /** Replaces the leading coefficient in a polynomial by a given number.
1180  *
1181  *  @param[in] poly  polynomial to change
1182  *  @param[in] lc    new leading coefficient
1183  *  @return          changed polynomial
1184  */
1185 static upoly replace_lc(const upoly& poly, const cl_I& lc)
1186 {
1187         if ( poly.empty() ) return poly;
1188         upoly r = poly;
1189         r.back() = lc;
1190         return r;
1191 }
1192
1193 /** Calculates bound for the product of absolute values (modulus) of the roots.
1194  *  Uses Landau's inequality, see [Mig].
1195  */
1196 static inline cl_I calc_bound(const ex& a, const ex& x)
1197 {
1198         cl_R radicand = 0;
1199         for ( int i=a.degree(x); i>=a.ldegree(x); --i ) {
1200                 cl_I aa = abs(the<cl_I>(ex_to<numeric>(a.coeff(x, i)).to_cl_N()));
1201                 radicand = radicand + square(aa);
1202         }
1203         return ceiling1(the<cl_R>(cln::sqrt(radicand)));
1204 }
1205
1206 /** Calculates bound for the product of absolute values (modulus) of the roots.
1207  *  Uses Landau's inequality, see [Mig].
1208  */
1209 static inline cl_I calc_bound(const upoly& a)
1210 {
1211         cl_R radicand = 0;
1212         for ( int i=degree(a); i>=0; --i ) {
1213                 cl_I aa = abs(a[i]);
1214                 radicand = radicand + square(aa);
1215         }
1216         return ceiling1(the<cl_R>(cln::sqrt(radicand)));
1217 }
1218
1219 /** Hensel lifting as used by factor_univariate().
1220  *
1221  *  The implementation follows algorithm 6.1 of [GCL].
1222  *
1223  *  @param[in]  a_   primitive univariate polynomials
1224  *  @param[in]  p    prime number that does not divide lcoeff(a)
1225  *  @param[in]  u1_  modular factor of a (mod p)
1226  *  @param[in]  w1_  modular factor of a (mod p), relatively prime to u1_,
1227  *                   fulfilling  u1_*w1_ == a mod p
1228  *  @param[out] u    lifted factor
1229  *  @param[out] w    lifted factor, u*w = a
1230  */
1231 static void hensel_univar(const upoly& a_, unsigned int p, const umodpoly& u1_, const umodpoly& w1_, upoly& u, upoly& w)
1232 {
1233         upoly a = a_;
1234         const cl_modint_ring& R = u1_[0].ring();
1235
1236         // calc bound B
1237         int maxdeg = (degree(u1_) > degree(w1_)) ? degree(u1_) : degree(w1_);
1238         cl_I maxmodulus = calc_bound(a) * ash(cl_I(1), maxdeg+1);  // 2 * calc_bound(a) * 2^maxdeg
1239
1240         // step 1
1241         cl_I alpha = lcoeff(a);
1242         a = a * alpha;
1243         umodpoly nu1 = u1_;
1244         normalize_in_field(nu1);
1245         umodpoly nw1 = w1_;
1246         normalize_in_field(nw1);
1247         upoly phi;
1248         phi = umodpoly_to_upoly(nu1) * alpha;
1249         umodpoly u1;
1250         umodpoly_from_upoly(u1, phi, R);
1251         phi = umodpoly_to_upoly(nw1) * alpha;
1252         umodpoly w1;
1253         umodpoly_from_upoly(w1, phi, R);
1254
1255         // step 2
1256         umodpoly s;
1257         umodpoly t;
1258         exteuclid(u1, w1, s, t);
1259
1260         // step 3
1261         u = replace_lc(umodpoly_to_upoly(u1), alpha);
1262         w = replace_lc(umodpoly_to_upoly(w1), alpha);
1263         upoly e = a - u * w;
1264         cl_I modulus = p;
1265
1266         // step 4
1267         while ( !e.empty() && modulus < maxmodulus ) {
1268                 upoly c = e / modulus;
1269                 phi = umodpoly_to_upoly(s) * c;
1270                 umodpoly sigmatilde;
1271                 umodpoly_from_upoly(sigmatilde, phi, R);
1272                 phi = umodpoly_to_upoly(t) * c;
1273                 umodpoly tautilde;
1274                 umodpoly_from_upoly(tautilde, phi, R);
1275                 umodpoly r, q;
1276                 remdiv(sigmatilde, w1, r, q);
1277                 umodpoly sigma = r;
1278                 phi = umodpoly_to_upoly(tautilde) + umodpoly_to_upoly(q) * umodpoly_to_upoly(u1);
1279                 umodpoly tau;
1280                 umodpoly_from_upoly(tau, phi, R);
1281                 u = u + umodpoly_to_upoly(tau) * modulus;
1282                 w = w + umodpoly_to_upoly(sigma) * modulus;
1283                 e = a - u * w;
1284                 modulus = modulus * p;
1285         }
1286
1287         // step 5
1288         if ( e.empty() ) {
1289                 cl_I g = u[0];
1290                 for ( size_t i=1; i<u.size(); ++i ) {
1291                         g = gcd(g, u[i]);
1292                         if ( g == 1 ) break;
1293                 }
1294                 if ( g != 1 ) {
1295                         u = u / g;
1296                         w = w * g;
1297                 }
1298                 if ( alpha != 1 ) {
1299                         w = w / alpha;
1300                 }
1301         } else {
1302                 u.clear();
1303         }
1304 }
1305
1306 /** Returns a new prime number.
1307  *
1308  *  @param[in] p  prime number
1309  *  @return       next prime number after p
1310  */
1311 static unsigned int next_prime(unsigned int p)
1312 {
1313         static vector<unsigned int> primes;
1314         if (primes.empty()) {
1315                 primes = {3, 5, 7};
1316         }
1317         if ( p >= primes.back() ) {
1318                 unsigned int candidate = primes.back() + 2;
1319                 while ( true ) {
1320                         size_t n = primes.size()/2;
1321                         for ( size_t i=0; i<n; ++i ) {
1322                                 if (candidate % primes[i])
1323                                         continue;
1324                                 candidate += 2;
1325                                 i=-1;
1326                         }
1327                         primes.push_back(candidate);
1328                         if (candidate > p)
1329                                 break;
1330                 }
1331                 return candidate;
1332         }
1333         for (auto & it : primes) {
1334                 if ( it > p ) {
1335                         return it;
1336                 }
1337         }
1338         throw logic_error("next_prime: should not reach this point!");
1339 }
1340
1341 /** Manages the splitting of a vector of modular factors into two partitions.
1342  */
1343 class factor_partition
1344 {
1345 public:
1346         /** Takes the vector of modular factors and initializes the first partition */
1347         factor_partition(const upvec& factors_) : factors(factors_)
1348         {
1349                 n = factors.size();
1350                 k.resize(n, 0);
1351                 k[0] = 1;
1352                 cache.resize(n-1);
1353                 one.resize(1, factors.front()[0].ring()->one());
1354                 len = 1;
1355                 last = 0;
1356                 split();
1357         }
1358         int operator[](size_t i) const { return k[i]; }
1359         size_t size() const { return n; }
1360         size_t size_left() const { return n-len; }
1361         size_t size_right() const { return len; }
1362         /** Initializes the next partition.
1363             Returns true, if there is one, false otherwise. */
1364         bool next()
1365         {
1366                 if ( last == n-1 ) {
1367                         int rem = len - 1;
1368                         int p = last - 1;
1369                         while ( rem ) {
1370                                 if ( k[p] ) {
1371                                         --rem;
1372                                         --p;
1373                                         continue;
1374                                 }
1375                                 last = p - 1;
1376                                 while ( k[last] == 0 ) { --last; }
1377                                 if ( last == 0 && n == 2*len ) return false;
1378                                 k[last++] = 0;
1379                                 for ( size_t i=0; i<=len-rem; ++i ) {
1380                                         k[last] = 1;
1381                                         ++last;
1382                                 }
1383                                 fill(k.begin()+last, k.end(), 0);
1384                                 --last;
1385                                 split();
1386                                 return true;
1387                         }
1388                         last = len;
1389                         ++len;
1390                         if ( len > n/2 ) return false;
1391                         fill(k.begin(), k.begin()+len, 1);
1392                         fill(k.begin()+len+1, k.end(), 0);
1393                 } else {
1394                         k[last++] = 0;
1395                         k[last] = 1;
1396                 }
1397                 split();
1398                 return true;
1399         }
1400         /** Get first partition */
1401         umodpoly& left() { return lr[0]; }
1402         /** Get second partition */
1403         umodpoly& right() { return lr[1]; }
1404 private:
1405         void split_cached()
1406         {
1407                 size_t i = 0;
1408                 do {
1409                         size_t pos = i;
1410                         int group = k[i++];
1411                         size_t d = 0;
1412                         while ( i < n && k[i] == group ) { ++d; ++i; }
1413                         if ( d ) {
1414                                 if ( cache[pos].size() >= d ) {
1415                                         lr[group] = lr[group] * cache[pos][d-1];
1416                                 } else {
1417                                         if ( cache[pos].size() == 0 ) {
1418                                                 cache[pos].push_back(factors[pos] * factors[pos+1]);
1419                                         }
1420                                         size_t j = pos + cache[pos].size() + 1;
1421                                         d -= cache[pos].size();
1422                                         while ( d ) {
1423                                                 umodpoly buf = cache[pos].back() * factors[j];
1424                                                 cache[pos].push_back(buf);
1425                                                 --d;
1426                                                 ++j;
1427                                         }
1428                                         lr[group] = lr[group] * cache[pos].back();
1429                                 }
1430                         } else {
1431                                 lr[group] = lr[group] * factors[pos];
1432                         }
1433                 } while ( i < n );
1434         }
1435         void split()
1436         {
1437                 lr[0] = one;
1438                 lr[1] = one;
1439                 if ( n > 6 ) {
1440                         split_cached();
1441                 } else {
1442                         for ( size_t i=0; i<n; ++i ) {
1443                                 lr[k[i]] = lr[k[i]] * factors[i];
1444                         }
1445                 }
1446         }
1447 private:
1448         umodpoly lr[2];
1449         vector<vector<umodpoly>> cache;
1450         upvec factors;
1451         umodpoly one;
1452         size_t n;
1453         size_t len;
1454         size_t last;
1455         vector<int> k;
1456 };
1457
1458 /** Contains a pair of univariate polynomial and its modular factors.
1459  *  Used by factor_univariate().
1460  */
1461 struct ModFactors
1462 {
1463         upoly poly;
1464         upvec factors;
1465 };
1466
1467 /** Univariate polynomial factorization.
1468  *
1469  *  Modular factorization is tried for several primes to minimize the number of
1470  *  modular factors. Then, Hensel lifting is performed.
1471  *
1472  *  @param[in]     poly   expanded square free univariate polynomial
1473  *  @param[in]     x      symbol
1474  *  @param[in,out] prime  prime number to start trying modular factorization with,
1475  *                        output value is the prime number actually used
1476  */
1477 static ex factor_univariate(const ex& poly, const ex& x, unsigned int& prime)
1478 {
1479         ex unit, cont, prim_ex;
1480         poly.unitcontprim(x, unit, cont, prim_ex);
1481         upoly prim;
1482         upoly_from_ex(prim, prim_ex, x);
1483         if (prim_ex.is_equal(1)) {
1484                 return poly;
1485         }
1486
1487         // determine proper prime and minimize number of modular factors
1488         prime = 3;
1489         unsigned int lastp = prime;
1490         cl_modint_ring R;
1491         unsigned int trials = 0;
1492         unsigned int minfactors = 0;
1493
1494         const numeric& cont_n = ex_to<numeric>(cont);
1495         cl_I i_cont;
1496         if (cont_n.is_integer()) {
1497                 i_cont = the<cl_I>(cont_n.to_cl_N());
1498         } else {
1499                 // poly \in Q[x] => poly = q ipoly, ipoly \in Z[x], q \in Q
1500                 // factor(poly) \equiv q factor(ipoly)
1501                 i_cont = cl_I(1);
1502         }
1503         cl_I lc = lcoeff(prim)*i_cont;
1504         upvec factors;
1505         while ( trials < 2 ) {
1506                 umodpoly modpoly;
1507                 while ( true ) {
1508                         prime = next_prime(prime);
1509                         if ( !zerop(rem(lc, prime)) ) {
1510                                 R = find_modint_ring(prime);
1511                                 umodpoly_from_upoly(modpoly, prim, R);
1512                                 if ( squarefree(modpoly) ) break;
1513                         }
1514                 }
1515
1516                 // do modular factorization
1517                 upvec trialfactors;
1518                 factor_modular(modpoly, trialfactors);
1519                 if ( trialfactors.size() <= 1 ) {
1520                         // irreducible for sure
1521                         return poly;
1522                 }
1523
1524                 if ( minfactors == 0 || trialfactors.size() < minfactors ) {
1525                         factors = trialfactors;
1526                         minfactors = trialfactors.size();
1527                         lastp = prime;
1528                         trials = 1;
1529                 } else {
1530                         ++trials;
1531                 }
1532         }
1533         prime = lastp;
1534         R = find_modint_ring(prime);
1535
1536         // lift all factor combinations
1537         stack<ModFactors> tocheck;
1538         ModFactors mf;
1539         mf.poly = prim;
1540         mf.factors = factors;
1541         tocheck.push(mf);
1542         upoly f1, f2;
1543         ex result = 1;
1544         while ( tocheck.size() ) {
1545                 const size_t n = tocheck.top().factors.size();
1546                 factor_partition part(tocheck.top().factors);
1547                 while ( true ) {
1548                         // call Hensel lifting
1549                         hensel_univar(tocheck.top().poly, prime, part.left(), part.right(), f1, f2);
1550                         if ( !f1.empty() ) {
1551                                 // successful, update the stack and the result
1552                                 if ( part.size_left() == 1 ) {
1553                                         if ( part.size_right() == 1 ) {
1554                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1555                                                 tocheck.pop();
1556                                                 break;
1557                                         }
1558                                         result *= upoly_to_ex(f1, x);
1559                                         tocheck.top().poly = f2;
1560                                         for ( size_t i=0; i<n; ++i ) {
1561                                                 if ( part[i] == 0 ) {
1562                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1563                                                         break;
1564                                                 }
1565                                         }
1566                                         break;
1567                                 }
1568                                 else if ( part.size_right() == 1 ) {
1569                                         if ( part.size_left() == 1 ) {
1570                                                 result *= upoly_to_ex(f1, x) * upoly_to_ex(f2, x);
1571                                                 tocheck.pop();
1572                                                 break;
1573                                         }
1574                                         result *= upoly_to_ex(f2, x);
1575                                         tocheck.top().poly = f1;
1576                                         for ( size_t i=0; i<n; ++i ) {
1577                                                 if ( part[i] == 1 ) {
1578                                                         tocheck.top().factors.erase(tocheck.top().factors.begin()+i);
1579                                                         break;
1580                                                 }
1581                                         }
1582                                         break;
1583                                 } else {
1584                                         upvec newfactors1(part.size_left()), newfactors2(part.size_right());
1585                                         auto i1 = newfactors1.begin(), i2 = newfactors2.begin();
1586                                         for ( size_t i=0; i<n; ++i ) {
1587                                                 if ( part[i] ) {
1588                                                         *i2++ = tocheck.top().factors[i];
1589                                                 } else {
1590                                                         *i1++ = tocheck.top().factors[i];
1591                                                 }
1592                                         }
1593                                         tocheck.top().factors = newfactors1;
1594                                         tocheck.top().poly = f1;
1595                                         ModFactors mf;
1596                                         mf.factors = newfactors2;
1597                                         mf.poly = f2;
1598                                         tocheck.push(mf);
1599                                         break;
1600                                 }
1601                         } else {
1602                                 // not successful
1603                                 if ( !part.next() ) {
1604                                         // if no more combinations left, return polynomial as
1605                                         // irreducible
1606                                         result *= upoly_to_ex(tocheck.top().poly, x);
1607                                         tocheck.pop();
1608                                         break;
1609                                 }
1610                         }
1611                 }
1612         }
1613
1614         return unit * cont * result;
1615 }
1616
1617 /** Second interface to factor_univariate() to be used if the information about
1618  *  the prime is not needed.
1619  */
1620 static inline ex factor_univariate(const ex& poly, const ex& x)
1621 {
1622         unsigned int prime;
1623         return factor_univariate(poly, x, prime);
1624 }
1625
1626 /** Represents an evaluation point (<symbol>==<integer>).
1627  */
1628 struct EvalPoint
1629 {
1630         ex x;
1631         int evalpoint;
1632 };
1633
1634 #ifdef DEBUGFACTOR
1635 ostream& operator<<(ostream& o, const vector<EvalPoint>& v)
1636 {
1637         for ( size_t i=0; i<v.size(); ++i ) {
1638                 o << "(" << v[i].x << "==" << v[i].evalpoint << ") ";
1639         }
1640         return o;
1641 }
1642 #endif // def DEBUGFACTOR
1643
1644 // forward declaration
1645 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I, unsigned int d, unsigned int p, unsigned int k);
1646
1647 /** Utility function for multivariate Hensel lifting.
1648  *
1649  *  Solves the equation
1650  *    s_1*b_1 + ... + s_r*b_r == 1 mod p^k
1651  *  with deg(s_i) < deg(a_i)
1652  *  and with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1653  *
1654  *  The implementation follows algorithm 6.3 of [GCL].
1655  *
1656  *  @param[in]  a   vector of modular univariate polynomials
1657  *  @param[in]  x   symbol
1658  *  @param[in]  p   prime number
1659  *  @param[in]  k   p^k is modulus
1660  *  @return         vector of polynomials (s_i)
1661  */
1662 static upvec multiterm_eea_lift(const upvec& a, const ex& x, unsigned int p, unsigned int k)
1663 {
1664         const size_t r = a.size();
1665         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1666         upvec q(r-1);
1667         q[r-2] = a[r-1];
1668         for ( size_t j=r-2; j>=1; --j ) {
1669                 q[j-1] = a[j] * q[j];
1670         }
1671         umodpoly beta(1, R->one());
1672         upvec s;
1673         for ( size_t j=1; j<r; ++j ) {
1674                 vector<ex> mdarg(2);
1675                 mdarg[0] = umodpoly_to_ex(q[j-1], x);
1676                 mdarg[1] = umodpoly_to_ex(a[j-1], x);
1677                 vector<EvalPoint> empty;
1678                 vector<ex> exsigma = multivar_diophant(mdarg, x, umodpoly_to_ex(beta, x), empty, 0, p, k);
1679                 umodpoly sigma1;
1680                 umodpoly_from_ex(sigma1, exsigma[0], x, R);
1681                 umodpoly sigma2;
1682                 umodpoly_from_ex(sigma2, exsigma[1], x, R);
1683                 beta = sigma1;
1684                 s.push_back(sigma2);
1685         }
1686         s.push_back(beta);
1687         return s;
1688 }
1689
1690 /** Changes the modulus of a modular polynomial. Used by eea_lift().
1691  *
1692  *  @param[in]     R  new modular ring
1693  *  @param[in,out] a  polynomial to change (in situ)
1694  */
1695 static void change_modulus(const cl_modint_ring& R, umodpoly& a)
1696 {
1697         if ( a.empty() ) return;
1698         cl_modint_ring oldR = a[0].ring();
1699         for (auto & i : a) {
1700                 i = R->canonhom(oldR->retract(i));
1701         }
1702         canonicalize(a);
1703 }
1704
1705 /** Utility function for multivariate Hensel lifting.
1706  *
1707  *  Solves  s*a + t*b == 1 mod p^k  given a,b.
1708  *
1709  *  The implementation follows algorithm 6.3 of [GCL].
1710  *
1711  *  @param[in]  a   polynomial
1712  *  @param[in]  b   polynomial
1713  *  @param[in]  x   symbol
1714  *  @param[in]  p   prime number
1715  *  @param[in]  k   p^k is modulus
1716  *  @param[out] s_  output polynomial
1717  *  @param[out] t_  output polynomial
1718  */
1719 static void eea_lift(const umodpoly& a, const umodpoly& b, const ex& x, unsigned int p, unsigned int k, umodpoly& s_, umodpoly& t_)
1720 {
1721         cl_modint_ring R = find_modint_ring(p);
1722         umodpoly amod = a;
1723         change_modulus(R, amod);
1724         umodpoly bmod = b;
1725         change_modulus(R, bmod);
1726
1727         umodpoly smod;
1728         umodpoly tmod;
1729         exteuclid(amod, bmod, smod, tmod);
1730
1731         cl_modint_ring Rpk = find_modint_ring(expt_pos(cl_I(p),k));
1732         umodpoly s = smod;
1733         change_modulus(Rpk, s);
1734         umodpoly t = tmod;
1735         change_modulus(Rpk, t);
1736
1737         cl_I modulus(p);
1738         umodpoly one(1, Rpk->one());
1739         for ( size_t j=1; j<k; ++j ) {
1740                 umodpoly e = one - a * s - b * t;
1741                 reduce_coeff(e, modulus);
1742                 umodpoly c = e;
1743                 change_modulus(R, c);
1744                 umodpoly sigmabar = smod * c;
1745                 umodpoly taubar = tmod * c;
1746                 umodpoly sigma, q;
1747                 remdiv(sigmabar, bmod, sigma, q);
1748                 umodpoly tau = taubar + q * amod;
1749                 umodpoly sadd = sigma;
1750                 change_modulus(Rpk, sadd);
1751                 cl_MI modmodulus(Rpk, modulus);
1752                 s = s + sadd * modmodulus;
1753                 umodpoly tadd = tau;
1754                 change_modulus(Rpk, tadd);
1755                 t = t + tadd * modmodulus;
1756                 modulus = modulus * p;
1757         }
1758
1759         s_ = s; t_ = t;
1760 }
1761
1762 /** Utility function for multivariate Hensel lifting.
1763  *
1764  *  Solves the equation
1765  *    s_1*b_1 + ... + s_r*b_r == x^m mod p^k
1766  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1767  *
1768  *  The implementation follows algorithm 6.3 of [GCL].
1769  *
1770  *  @param a  vector with univariate polynomials mod p^k
1771  *  @param x  symbol
1772  *  @param m  exponent of x^m in the equation to solve
1773  *  @param p  prime number
1774  *  @param k  p^k is modulus
1775  *  @return   vector of polynomials (s_i)
1776  */
1777 static upvec univar_diophant(const upvec& a, const ex& x, unsigned int m, unsigned int p, unsigned int k)
1778 {
1779         cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),k));
1780
1781         const size_t r = a.size();
1782         upvec result;
1783         if ( r > 2 ) {
1784                 upvec s = multiterm_eea_lift(a, x, p, k);
1785                 for ( size_t j=0; j<r; ++j ) {
1786                         umodpoly bmod = umodpoly_to_umodpoly(s[j], R, m);
1787                         umodpoly buf;
1788                         rem(bmod, a[j], buf);
1789                         result.push_back(buf);
1790                 }
1791         } else {
1792                 umodpoly s, t;
1793                 eea_lift(a[1], a[0], x, p, k, s, t);
1794                 umodpoly bmod = umodpoly_to_umodpoly(s, R, m);
1795                 umodpoly buf, q;
1796                 remdiv(bmod, a[0], buf, q);
1797                 result.push_back(buf);
1798                 umodpoly t1mod = umodpoly_to_umodpoly(t, R, m);
1799                 buf = t1mod + q * a[1];
1800                 result.push_back(buf);
1801         }
1802
1803         return result;
1804 }
1805
1806 /** Map used by function make_modular().
1807  *  Finds every coefficient in a polynomial and replaces it by is value in the
1808  *  given modular ring R (symmetric representation).
1809  */
1810 struct make_modular_map : public map_function {
1811         cl_modint_ring R;
1812         make_modular_map(const cl_modint_ring& R_) : R(R_) { }
1813         ex operator()(const ex& e) override
1814         {
1815                 if ( is_a<add>(e) || is_a<mul>(e) ) {
1816                         return e.map(*this);
1817                 }
1818                 else if ( is_a<numeric>(e) ) {
1819                         numeric mod(R->modulus);
1820                         numeric halfmod = (mod-1)/2;
1821                         cl_MI emod = R->canonhom(the<cl_I>(ex_to<numeric>(e).to_cl_N()));
1822                         numeric n(R->retract(emod));
1823                         if ( n > halfmod ) {
1824                                 return n-mod;
1825                         } else {
1826                                 return n;
1827                         }
1828                 }
1829                 return e;
1830         }
1831 };
1832
1833 /** Helps mimicking modular multivariate polynomial arithmetic.
1834  *
1835  *  @param e  expression of which to make the coefficients equal to their value
1836  *            in the modular ring R (symmetric representation)
1837  *  @param R  modular ring
1838  *  @return   resulting expression
1839  */
1840 static ex make_modular(const ex& e, const cl_modint_ring& R)
1841 {
1842         make_modular_map map(R);
1843         return map(e.expand());
1844 }
1845
1846 /** Utility function for multivariate Hensel lifting.
1847  *
1848  *  Returns the polynomials s_i that fulfill
1849  *    s_1*b_1 + ... + s_r*b_r == c mod <I^(d+1),p^k>
1850  *  with given b_1 = a_1 * ... * a_{i-1} * a_{i+1} * ... * a_r
1851  *
1852  *  The implementation follows algorithm 6.2 of [GCL].
1853  *
1854  *  @param a_  vector of multivariate factors mod p^k
1855  *  @param x   symbol (equiv. x_1 in [GCL])
1856  *  @param c   polynomial mod p^k
1857  *  @param I   vector of evaluation points
1858  *  @param d   maximum total degree of result
1859  *  @param p   prime number
1860  *  @param k   p^k is modulus
1861  *  @return    vector of polynomials (s_i)
1862  */
1863 static vector<ex> multivar_diophant(const vector<ex>& a_, const ex& x, const ex& c, const vector<EvalPoint>& I,
1864                                     unsigned int d, unsigned int p, unsigned int k)
1865 {
1866         vector<ex> a = a_;
1867
1868         const cl_I modulus = expt_pos(cl_I(p),k);
1869         const cl_modint_ring R = find_modint_ring(modulus);
1870         const size_t r = a.size();
1871         const size_t nu = I.size() + 1;
1872
1873         vector<ex> sigma;
1874         if ( nu > 1 ) {
1875                 ex xnu = I.back().x;
1876                 int alphanu = I.back().evalpoint;
1877
1878                 ex A = 1;
1879                 for ( size_t i=0; i<r; ++i ) {
1880                         A *= a[i];
1881                 }
1882                 vector<ex> b(r);
1883                 for ( size_t i=0; i<r; ++i ) {
1884                         b[i] = normal(A / a[i]);
1885                 }
1886
1887                 vector<ex> anew = a;
1888                 for ( size_t i=0; i<r; ++i ) {
1889                         anew[i] = anew[i].subs(xnu == alphanu);
1890                 }
1891                 ex cnew = c.subs(xnu == alphanu);
1892                 vector<EvalPoint> Inew = I;
1893                 Inew.pop_back();
1894                 sigma = multivar_diophant(anew, x, cnew, Inew, d, p, k);
1895
1896                 ex buf = c;
1897                 for ( size_t i=0; i<r; ++i ) {
1898                         buf -= sigma[i] * b[i];
1899                 }
1900                 ex e = make_modular(buf, R);
1901
1902                 ex monomial = 1;
1903                 for ( size_t m=1; !e.is_zero() && e.has(xnu) && m<=d; ++m ) {
1904                         monomial *= (xnu - alphanu);
1905                         monomial = expand(monomial);
1906                         ex cm = e.diff(ex_to<symbol>(xnu), m).subs(xnu==alphanu) / factorial(m);
1907                         cm = make_modular(cm, R);
1908                         if ( !cm.is_zero() ) {
1909                                 vector<ex> delta_s = multivar_diophant(anew, x, cm, Inew, d, p, k);
1910                                 ex buf = e;
1911                                 for ( size_t j=0; j<delta_s.size(); ++j ) {
1912                                         delta_s[j] *= monomial;
1913                                         sigma[j] += delta_s[j];
1914                                         buf -= delta_s[j] * b[j];
1915                                 }
1916                                 e = make_modular(buf, R);
1917                         }
1918                 }
1919         } else {
1920                 upvec amod;
1921                 for ( size_t i=0; i<a.size(); ++i ) {
1922                         umodpoly up;
1923                         umodpoly_from_ex(up, a[i], x, R);
1924                         amod.push_back(up);
1925                 }
1926
1927                 sigma.insert(sigma.begin(), r, 0);
1928                 size_t nterms;
1929                 ex z;
1930                 if ( is_a<add>(c) ) {
1931                         nterms = c.nops();
1932                         z = c.op(0);
1933                 } else {
1934                         nterms = 1;
1935                         z = c;
1936                 }
1937                 for ( size_t i=0; i<nterms; ++i ) {
1938                         int m = z.degree(x);
1939                         cl_I cm = the<cl_I>(ex_to<numeric>(z.lcoeff(x)).to_cl_N());
1940                         upvec delta_s = univar_diophant(amod, x, m, p, k);
1941                         cl_MI modcm;
1942                         cl_I poscm = plusp(cm) ? cm : mod(cm, modulus);
1943                         modcm = cl_MI(R, poscm);
1944                         for ( size_t j=0; j<delta_s.size(); ++j ) {
1945                                 delta_s[j] = delta_s[j] * modcm;
1946                                 sigma[j] = sigma[j] + umodpoly_to_ex(delta_s[j], x);
1947                         }
1948                         if ( nterms > 1 && i+1 != nterms ) {
1949                                 z = c.op(i+1);
1950                         }
1951                 }
1952         }
1953
1954         for ( size_t i=0; i<sigma.size(); ++i ) {
1955                 sigma[i] = make_modular(sigma[i], R);
1956         }
1957
1958         return sigma;
1959 }
1960
1961 /** Multivariate Hensel lifting.
1962  *  The implementation follows algorithm 6.4 of [GCL].
1963  *  Since we don't have a data type for modular multivariate polynomials, the
1964  *  respective operations are done in a GiNaC::ex and the function
1965  *  make_modular() is then called to make the coefficient modular p^l.
1966  *
1967  *  @param a    multivariate polynomial primitive in x
1968  *  @param x    symbol (equiv. x_1 in [GCL])
1969  *  @param I    vector of evaluation points (x_2==a_2,x_3==a_3,...)
1970  *  @param p    prime number (should not divide lcoeff(a mod I))
1971  *  @param l    p^l is the modulus of the lifted univariate field
1972  *  @param u    vector of modular (mod p^l) factors of a mod I
1973  *  @param lcU  correct leading coefficient of the univariate factors of a mod I
1974  *  @return     list GiNaC::lst with lifted factors (multivariate factors of a),
1975  *              empty if Hensel lifting did not succeed
1976  */
1977 static ex hensel_multivar(const ex& a, const ex& x, const vector<EvalPoint>& I,
1978                           unsigned int p, const cl_I& l, const upvec& u, const vector<ex>& lcU)
1979 {
1980         const size_t nu = I.size() + 1;
1981         const cl_modint_ring R = find_modint_ring(expt_pos(cl_I(p),l));
1982
1983         vector<ex> A(nu);
1984         A[nu-1] = a;
1985
1986         for ( size_t j=nu; j>=2; --j ) {
1987                 ex x = I[j-2].x;
1988                 int alpha = I[j-2].evalpoint;
1989                 A[j-2] = A[j-1].subs(x==alpha);
1990                 A[j-2] = make_modular(A[j-2], R);
1991         }
1992
1993         int maxdeg = a.degree(I.front().x);
1994         for ( size_t i=1; i<I.size(); ++i ) {
1995                 int maxdeg2 = a.degree(I[i].x);
1996                 if ( maxdeg2 > maxdeg ) maxdeg = maxdeg2;
1997         }
1998
1999         const size_t n = u.size();
2000         vector<ex> U(n);
2001         for ( size_t i=0; i<n; ++i ) {
2002                 U[i] = umodpoly_to_ex(u[i], x);
2003         }
2004
2005         for ( size_t j=2; j<=nu; ++j ) {
2006                 vector<ex> U1 = U;
2007                 ex monomial = 1;
2008                 for ( size_t m=0; m<n; ++m) {
2009                         if ( lcU[m] != 1 ) {
2010                                 ex coef = lcU[m];
2011                                 for ( size_t i=j-1; i<nu-1; ++i ) {
2012                                         coef = coef.subs(I[i].x == I[i].evalpoint);
2013                                 }
2014                                 coef = make_modular(coef, R);
2015                                 int deg = U[m].degree(x);
2016                                 U[m] = U[m] - U[m].lcoeff(x) * pow(x,deg) + coef * pow(x,deg);
2017                         }
2018                 }
2019                 ex Uprod = 1;
2020                 for ( size_t i=0; i<n; ++i ) {
2021                         Uprod *= U[i];
2022                 }
2023                 ex e = expand(A[j-1] - Uprod);
2024
2025                 vector<EvalPoint> newI;
2026                 for ( size_t i=1; i<=j-2; ++i ) {
2027                         newI.push_back(I[i-1]);
2028                 }
2029
2030                 ex xj = I[j-2].x;
2031                 int alphaj = I[j-2].evalpoint;
2032                 size_t deg = A[j-1].degree(xj);
2033                 for ( size_t k=1; k<=deg; ++k ) {
2034                         if ( !e.is_zero() ) {
2035                                 monomial *= (xj - alphaj);
2036                                 monomial = expand(monomial);
2037                                 ex dif = e.diff(ex_to<symbol>(xj), k);
2038                                 ex c = dif.subs(xj==alphaj) / factorial(k);
2039                                 if ( !c.is_zero() ) {
2040                                         vector<ex> deltaU = multivar_diophant(U1, x, c, newI, maxdeg, p, cl_I_to_uint(l));
2041                                         for ( size_t i=0; i<n; ++i ) {
2042                                                 deltaU[i] *= monomial;
2043                                                 U[i] += deltaU[i];
2044                                                 U[i] = make_modular(U[i], R);
2045                                         }
2046                                         ex Uprod = 1;
2047                                         for ( size_t i=0; i<n; ++i ) {
2048                                                 Uprod *= U[i];
2049                                         }
2050                                         e = A[j-1] - Uprod;
2051                                         e = make_modular(e, R);
2052                                 }
2053                         }
2054                 }
2055         }
2056
2057         ex acand = 1;
2058         for ( size_t i=0; i<U.size(); ++i ) {
2059                 acand *= U[i];
2060         }
2061         if ( expand(a-acand).is_zero() ) {
2062                 return lst(U.begin(), U.end());
2063         } else {
2064                 return lst{};
2065         }
2066 }
2067
2068 /** Takes a factorized expression and puts the factors in a vector. The exponents
2069  *  of the factors are discarded, e.g. 7*x^2*(y+1)^4 --> {7,x,y+1}. The first
2070  *  element of the result is always the numeric coefficient.
2071  */
2072 static exvector put_factors_into_vec(const ex& e)
2073 {
2074         exvector result;
2075         if ( is_a<numeric>(e) ) {
2076                 result.push_back(e);
2077                 return result;
2078         }
2079         if ( is_a<power>(e) ) {
2080                 result.push_back(1);
2081                 result.push_back(e.op(0));
2082                 return result;
2083         }
2084         if ( is_a<symbol>(e) || is_a<add>(e) ) {
2085                 ex icont(e.integer_content());
2086                 result.push_back(icont);
2087                 result.push_back(e/icont);
2088                 return result;
2089         }
2090         if ( is_a<mul>(e) ) {
2091                 ex nfac = 1;
2092                 result.push_back(nfac);
2093                 for ( size_t i=0; i<e.nops(); ++i ) {
2094                         ex op = e.op(i);
2095                         if ( is_a<numeric>(op) ) {
2096                                 nfac = op;
2097                         }
2098                         if ( is_a<power>(op) ) {
2099                                 result.push_back(op.op(0));
2100                         }
2101                         if ( is_a<symbol>(op) || is_a<add>(op) ) {
2102                                 result.push_back(op);
2103                         }
2104                 }
2105                 result[0] = nfac;
2106                 return result;
2107         }
2108         throw runtime_error("put_factors_into_vec: bad term.");
2109 }
2110
2111 /** Checks a set of numbers for whether each number has a unique prime factor.
2112  *
2113  *  @param[in]  f  numbers to check
2114  *  @return        true: if number set is bad, false: if set is okay (has unique
2115  *                 prime factors)
2116  */
2117 static bool checkdivisors(const exvector& f)
2118 {
2119         const int k = f.size();
2120         numeric q, r;
2121         vector<numeric> d(k);
2122         d[0] = ex_to<numeric>(abs(f[0]));
2123         for ( int i=1; i<k; ++i ) {
2124                 q = ex_to<numeric>(abs(f[i]));
2125                 for ( int j=i-1; j>=0; --j ) {
2126                         r = d[j];
2127                         do {
2128                                 r = gcd(r, q);
2129                                 q = q/r;
2130                         } while ( r != 1 );
2131                         if ( q == 1 ) {
2132                                 return true;
2133                         }
2134                 }
2135                 d[i] = q;
2136         }
2137         return false;
2138 }
2139
2140 /** Generates a set of evaluation points for a multivariate polynomial.
2141  *  The set fulfills the following conditions:
2142  *  1. lcoeff(evaluated_polynomial) does not vanish
2143  *  2. factors of lcoeff(evaluated_polynomial) have each a unique prime factor
2144  *  3. evaluated_polynomial is square free
2145  *  See [Wan] for more details.
2146  *
2147  *  @param[in]     u        multivariate polynomial to be factored
2148  *  @param[in]     vn       leading coefficient of u in x (x==first symbol in syms)
2149  *  @param[in]     x        first symbol that appears in u
2150  *  @param[in]     syms_wox remaining symbols that appear in u
2151  *  @param[in]     f        vector containing the factors of the leading coefficient vn
2152  *  @param[in,out] modulus  integer modulus for random number generation (i.e. |a_i| < modulus)
2153  *  @param[out]    u0       returns the evaluated (univariate) polynomial
2154  *  @param[out]    a        returns the valid evaluation points. must have initial size equal
2155  *                          number of symbols-1 before calling generate_set
2156  */
2157 static void generate_set(const ex& u, const ex& vn, const ex& x, const exset& syms_wox, const exvector& f,
2158                          numeric& modulus, ex& u0, vector<numeric>& a)
2159 {
2160         while ( true ) {
2161                 ++modulus;
2162                 // generate a set of integers ...
2163                 u0 = u;
2164                 ex vna = vn;
2165                 ex vnatry;
2166                 auto s = syms_wox.begin();
2167                 for ( size_t i=0; i<a.size(); ++i ) {
2168                         do {
2169                                 a[i] = mod(numeric(rand()), 2*modulus) - modulus;
2170                                 vnatry = vna.subs(*s == a[i]);
2171                                 // ... for which the leading coefficient doesn't vanish ...
2172                         } while ( vnatry == 0 );
2173                         vna = vnatry;
2174                         u0 = u0.subs(*s == a[i]);
2175                         ++s;
2176                 }
2177                 // ... for which u0 is square free ...
2178                 ex g = gcd(u0, u0.diff(ex_to<symbol>(x)));
2179                 if ( !is_a<numeric>(g) ) {
2180                         continue;
2181                 }
2182                 if ( !is_a<numeric>(vn) ) {
2183                         // ... and for which the evaluated factors have each an unique prime factor
2184                         exvector fnum = f;
2185                         fnum[0] = fnum[0] * u0.content(x);
2186                         for ( size_t i=1; i<fnum.size(); ++i ) {
2187                                 if ( !is_a<numeric>(fnum[i]) ) {
2188                                         s = syms_wox.begin();
2189                                         for ( size_t j=0; j<a.size(); ++j, ++s ) {
2190                                                 fnum[i] = fnum[i].subs(*s == a[j]);
2191                                         }
2192                                 }
2193                         }
2194                         if ( checkdivisors(fnum) ) {
2195                                 continue;
2196                         }
2197                 }
2198                 // ok, we have a valid set now
2199                 return;
2200         }
2201 }
2202
2203 // forward declaration
2204 static ex factor_sqrfree(const ex& poly);
2205
2206 /** Used by factor_multivariate().
2207  */
2208 struct factorization_ctx {
2209         const ex poly, x;         // polynomial, first symbol x...
2210         const exset syms_wox;     // ...remaining symbols w/o x
2211         ex unit, cont, pp;        // unit * cont * pp == poly
2212         ex vn; exvector vnlst;    // leading coeff, factors of leading coeff
2213         numeric modulus;          // incremented each time we try
2214         /** returns factors or empty if it did not succeed */
2215         ex try_next_evaluation_homomorphism()
2216         {
2217                 constexpr unsigned maxtrials = 3;
2218                 vector<numeric> a(syms_wox.size(), 0);
2219
2220                 unsigned int trialcount = 0;
2221                 unsigned int prime;
2222                 int factor_count = 0;
2223                 int min_factor_count = -1;
2224                 ex u, delta;
2225                 ex ufac;
2226                 exvector ufaclst;
2227
2228                 // try several evaluation points to reduce the number of factors
2229                 while ( trialcount < maxtrials ) {
2230
2231                         // generate a set of valid evaluation points
2232                         generate_set(pp, vn, x, syms_wox, vnlst, modulus, u, a);
2233
2234                         ufac = factor_univariate(u, x, prime);
2235                         ufaclst = put_factors_into_vec(ufac);
2236                         factor_count = ufaclst.size()-1;
2237                         delta = ufaclst[0];
2238
2239                         if ( factor_count <= 1 ) {
2240                                 // irreducible
2241                                 return lst{pp};
2242                         }
2243                         if ( min_factor_count < 0 ) {
2244                                 // first time here
2245                                 min_factor_count = factor_count;
2246                         }
2247                         else if ( min_factor_count == factor_count ) {
2248                                 // one less to try
2249                                 ++trialcount;
2250                         }
2251                         else if ( min_factor_count > factor_count ) {
2252                                 // new minimum, reset trial counter
2253                                 min_factor_count = factor_count;
2254                                 trialcount = 0;
2255                         }
2256                 }
2257
2258                 // determine true leading coefficients for the Hensel lifting
2259                 vector<ex> C(factor_count);
2260                 if ( is_a<numeric>(vn) ) {
2261                         // easy case
2262                         for ( size_t i=1; i<ufaclst.size(); ++i ) {
2263                                 C[i-1] = ufaclst[i].lcoeff(x);
2264                         }
2265                 } else {
2266                         // difficult case.
2267                         // we use the property of the ftilde having a unique prime factor.
2268                         // details can be found in [Wan].
2269                         // calculate ftilde
2270                         vector<numeric> ftilde(vnlst.size()-1);
2271                         for ( size_t i=0; i<ftilde.size(); ++i ) {
2272                                 ex ft = vnlst[i+1];
2273                                 auto s = syms_wox.begin();
2274                                 for ( size_t j=0; j<a.size(); ++j ) {
2275                                         ft = ft.subs(*s == a[j]);
2276                                         ++s;
2277                                 }
2278                                 ftilde[i] = ex_to<numeric>(ft);
2279                         }
2280                         // calculate D and C
2281                         vector<bool> used_flag(ftilde.size(), false);
2282                         vector<ex> D(factor_count, 1);
2283                         if ( delta == 1 ) {
2284                                 for ( int i=0; i<factor_count; ++i ) {
2285                                         numeric prefac = ex_to<numeric>(ufaclst[i+1].lcoeff(x));
2286                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2287                                                 int count = 0;
2288                                                 while ( irem(prefac, ftilde[j]) == 0 ) {
2289                                                         prefac = iquo(prefac, ftilde[j]);
2290                                                         ++count;
2291                                                 }
2292                                                 if ( count ) {
2293                                                         used_flag[j] = true;
2294                                                         D[i] = D[i] * pow(vnlst[j+1], count);
2295                                                 }
2296                                         }
2297                                         C[i] = D[i] * prefac;
2298                                 }
2299                         } else {
2300                                 for ( int i=0; i<factor_count; ++i ) {
2301                                         numeric prefac = ex_to<numeric>(ufaclst[i+1].lcoeff(x));
2302                                         for ( int j=ftilde.size()-1; j>=0; --j ) {
2303                                                 int count = 0;
2304                                                 while ( irem(prefac, ftilde[j]) == 0 ) {
2305                                                         prefac = iquo(prefac, ftilde[j]);
2306                                                         ++count;
2307                                                 }
2308                                                 while ( irem(ex_to<numeric>(delta)*prefac, ftilde[j]) == 0 ) {
2309                                                         numeric g = gcd(prefac, ex_to<numeric>(ftilde[j]));
2310                                                         prefac = iquo(prefac, g);
2311                                                         delta = delta / (ftilde[j]/g);
2312                                                         ufaclst[i+1] = ufaclst[i+1] * (ftilde[j]/g);
2313                                                         ++count;
2314                                                 }
2315                                                 if ( count ) {
2316                                                         used_flag[j] = true;
2317                                                         D[i] = D[i] * pow(vnlst[j+1], count);
2318                                                 }
2319                                         }
2320                                         C[i] = D[i] * prefac;
2321                                 }
2322                         }
2323                         // check if something went wrong
2324                         bool some_factor_unused = false;
2325                         for ( size_t i=0; i<used_flag.size(); ++i ) {
2326                                 if ( !used_flag[i] ) {
2327                                         some_factor_unused = true;
2328                                         break;
2329                                 }
2330                         }
2331                         if ( some_factor_unused ) {
2332                                 return lst{};  // next try
2333                         }
2334                 }
2335
2336                 // multiply the remaining content of the univariate polynomial into the
2337                 // first factor
2338                 if ( delta != 1 ) {
2339                         C[0] = C[0] * delta;
2340                         ufaclst[1] = ufaclst[1] * delta;
2341                 }
2342
2343                 // set up evaluation points
2344                 EvalPoint ep;
2345                 vector<EvalPoint> epv;
2346                 auto s = syms_wox.begin();
2347                 for ( size_t i=0; i<a.size(); ++i ) {
2348                         ep.x = *s++;
2349                         ep.evalpoint = a[i].to_int();
2350                         epv.push_back(ep);
2351                 }
2352
2353                 // calc bound p^l
2354                 int maxdeg = 0;
2355                 for ( int i=1; i<=factor_count; ++i ) {
2356                         if ( ufaclst[i].degree(x) > maxdeg ) {
2357                                 maxdeg = ufaclst[i].degree(x);
2358                         }
2359                 }
2360                 cl_I B = calc_bound(u, x) * ash(cl_I(1), maxdeg+1);  // 2 * calc_bound(u,x) * 2^maxdeg
2361                 cl_I l = 1;
2362                 cl_I pl = prime;
2363                 while ( pl < B ) {
2364                         l = l + 1;
2365                         pl = pl * prime;
2366                 }
2367
2368                 // set up modular factors (mod p^l)
2369                 cl_modint_ring R = find_modint_ring(pl);
2370                 upvec modfactors(ufaclst.size()-1);
2371                 for ( size_t i=1; i<ufaclst.size(); ++i ) {
2372                         umodpoly_from_ex(modfactors[i-1], ufaclst[i], x, R);
2373                 }
2374
2375                 // try Hensel lifting
2376                 return hensel_multivar(pp, x, epv, prime, l, modfactors, C);
2377         }
2378 };
2379
2380 /** Multivariate factorization.
2381  *
2382  *  The implementation is based on the algorithm described in [Wan].
2383  *  An evaluation homomorphism (a set of integers) is determined that fulfills
2384  *  certain criteria. The evaluated polynomial is univariate and is factorized
2385  *  by factor_univariate(). The main work then is to find the correct leading
2386  *  coefficients of the univariate factors. They have to correspond to the
2387  *  factors of the (multivariate) leading coefficient of the input polynomial
2388  *  (as defined for a specific variable x). After that the Hensel lifting can be
2389  *  performed. This is done in round-robin for each x in syms until success.
2390  *
2391  *  @param[in] poly  expanded, square free polynomial
2392  *  @param[in] syms  contains the symbols in the polynomial
2393  *  @return          factorized polynomial
2394  */
2395 static ex factor_multivariate(const ex& poly, const exset& syms)
2396 {
2397         // set up one factorization context for each symbol
2398         vector<factorization_ctx> ctx_in_x;
2399         for (auto x : syms) {
2400                 exset syms_wox;  // remaining syms w/o x
2401                 copy_if(syms.begin(), syms.end(),
2402                         inserter(syms_wox, syms_wox.end()), [x](const ex& y){ return y != x; });
2403
2404                 factorization_ctx ctx = {.poly = poly, .x = x,
2405                                          .syms_wox = syms_wox};
2406
2407                 // make polynomial primitive
2408                 poly.unitcontprim(x, ctx.unit, ctx.cont, ctx.pp);
2409                 if ( !is_a<numeric>(ctx.cont) ) {
2410                         // content is a polynomial in one or more of remaining syms, let's start over
2411                         return ctx.unit * factor_sqrfree(ctx.cont) * factor_sqrfree(ctx.pp);
2412                 }
2413
2414                 // find factors of leading coefficient
2415                 ctx.vn = ctx.pp.collect(x).lcoeff(x);
2416                 ctx.vnlst = put_factors_into_vec(factor(ctx.vn));
2417
2418                 ctx.modulus = (ctx.vnlst.size() > 3) ? ctx.vnlst.size() : 3;
2419
2420                 ctx_in_x.push_back(ctx);
2421         }
2422
2423         // try an evaluation homomorphism for each context in round-robin
2424         auto ctx = ctx_in_x.begin();
2425         while ( true ) {
2426
2427                 ex res = ctx->try_next_evaluation_homomorphism();
2428
2429                 if ( res != lst{} ) {
2430                         // found the factors
2431                         ex result = ctx->cont * ctx->unit;
2432                         for ( size_t i=0; i<res.nops(); ++i ) {
2433                                 ex unit, cont, pp;
2434                                 res.op(i).unitcontprim(ctx->x, unit, cont, pp);
2435                                 result *= unit * cont * pp;
2436                         }
2437                         return result;
2438                 }
2439
2440                 // switch context for next symbol
2441                 if (++ctx == ctx_in_x.end()) {
2442                         ctx = ctx_in_x.begin();
2443                 }
2444         }
2445 }
2446
2447 /** Finds all symbols in an expression. Used by factor_sqrfree() and factor().
2448  */
2449 struct find_symbols_map : public map_function {
2450         exset syms;
2451         ex operator()(const ex& e) override
2452         {
2453                 if ( is_a<symbol>(e) ) {
2454                         syms.insert(e);
2455                         return e;
2456                 }
2457                 return e.map(*this);
2458         }
2459 };
2460
2461 /** Factorizes a polynomial that is square free. It calls either the univariate
2462  *  or the multivariate factorization functions.
2463  */
2464 static ex factor_sqrfree(const ex& poly)
2465 {
2466         // determine all symbols in poly
2467         find_symbols_map findsymbols;
2468         findsymbols(poly);
2469         if ( findsymbols.syms.size() == 0 ) {
2470                 return poly;
2471         }
2472
2473         if ( findsymbols.syms.size() == 1 ) {
2474                 // univariate case
2475                 const ex& x = *(findsymbols.syms.begin());
2476                 int ld = poly.ldegree(x);
2477                 if ( ld > 0 ) {
2478                         // pull out direct factors
2479                         ex res = factor_univariate(expand(poly/pow(x, ld)), x);
2480                         return res * pow(x,ld);
2481                 } else {
2482                         ex res = factor_univariate(poly, x);
2483                         return res;
2484                 }
2485         }
2486
2487         // multivariate case
2488         ex res = factor_multivariate(poly, findsymbols.syms);
2489         return res;
2490 }
2491
2492 /** Map used by factor() when factor_options::all is given to access all
2493  *  subexpressions and to call factor() on them.
2494  */
2495 struct apply_factor_map : public map_function {
2496         unsigned options;
2497         apply_factor_map(unsigned options_) : options(options_) { }
2498         ex operator()(const ex& e) override
2499         {
2500                 if ( e.info(info_flags::polynomial) ) {
2501                         return factor(e, options);
2502                 }
2503                 if ( is_a<add>(e) ) {
2504                         ex s1, s2;
2505                         for ( size_t i=0; i<e.nops(); ++i ) {
2506                                 if ( e.op(i).info(info_flags::polynomial) ) {
2507                                         s1 += e.op(i);
2508                                 } else {
2509                                         s2 += e.op(i);
2510                                 }
2511                         }
2512                         return factor(s1, options) + s2.map(*this);
2513                 }
2514                 return e.map(*this);
2515         }
2516 };
2517
2518 /** Iterate through explicit factors of e, call yield(f, k) for
2519  *  each factor of the form f^k.
2520  *
2521  *  Note that this function doesn't factor e itself, it only
2522  *  iterates through the factors already explicitly present.
2523  */
2524 template <typename F> void
2525 factor_iter(const ex &e, F yield)
2526 {
2527         if (is_a<mul>(e)) {
2528                 for (const auto &f : e) {
2529                         if (is_a<power>(f)) {
2530                                 yield(f.op(0), f.op(1));
2531                         } else {
2532                                 yield(f, ex(1));
2533                         }
2534                 }
2535         } else {
2536                 if (is_a<power>(e)) {
2537                         yield(e.op(0), e.op(1));
2538                 } else {
2539                         yield(e, ex(1));
2540                 }
2541         }
2542 }
2543
2544 /** This function factorizes a polynomial. It checks the arguments,
2545  *  tries a square free factorization, and then calls factor_sqrfree
2546  *  to do the hard work.
2547  *
2548  *  This function expands its argument, so for polynomials with
2549  *  explicit factors it's better to call it on each one separately
2550  *  (or use factor() which does just that).
2551  */
2552 static ex factor1(const ex& poly, unsigned options)
2553 {
2554         // check arguments
2555         if ( !poly.info(info_flags::polynomial) ) {
2556                 if ( options & factor_options::all ) {
2557                         options &= ~factor_options::all;
2558                         apply_factor_map factor_map(options);
2559                         return factor_map(poly);
2560                 }
2561                 return poly;
2562         }
2563
2564         // determine all symbols in poly
2565         find_symbols_map findsymbols;
2566         findsymbols(poly);
2567         if ( findsymbols.syms.size() == 0 ) {
2568                 return poly;
2569         }
2570         lst syms;
2571         for (auto & i : findsymbols.syms ) {
2572                 syms.append(i);
2573         }
2574
2575         // make poly square free
2576         ex sfpoly = sqrfree(poly.expand(), syms);
2577
2578         // factorize the square free components
2579         ex res = 1;
2580         factor_iter(sfpoly,
2581                 [&](const ex &f, const ex &k) {
2582                         if ( is_a<add>(f) ) {
2583                                 res *= pow(factor_sqrfree(f), k);
2584                         } else {
2585                                 // simple case: (monomial)^exponent
2586                                 res *= pow(f, k);
2587                         }
2588                 });
2589         return res;
2590 }
2591
2592 } // anonymous namespace
2593
2594 /** Interface function to the outside world. It uses factor1()
2595  *  on each of the explicitly present factors of poly.
2596  */
2597 ex factor(const ex& poly, unsigned options)
2598 {
2599         ex result = 1;
2600         factor_iter(poly,
2601                 [&](const ex &f1, const ex &k1) {
2602                         factor_iter(factor1(f1, options),
2603                                 [&](const ex &f2, const ex &k2) {
2604                                         result *= pow(f2, k1*k2);
2605                                 });
2606                 });
2607         return result;
2608 }
2609
2610 } // namespace GiNaC