在求解大数的gcd时,欧几里得算法够简单但不够快,美国数学家Derrick Henry Lehmer 的算法更快。
python内置math.gcd的算法就是基于此算法。
PyObject *
_PyLong_GCD(PyObject *aarg, PyObject *barg)
{
PyLongObject *a, *b, *c = NULL, *d = NULL, *r;
stwodigits x, y, q, s, t, c_carry, d_carry;
stwodigits A, B, C, D, T;
int nbits, k;
Py_ssize_t size_a, size_b, alloc_a, alloc_b;
digit *a_digit, *b_digit, *c_digit, *d_digit, *a_end, *b_end;
a = (PyLongObject *)aarg;
b = (PyLongObject *)barg;
size_a = Py_SIZE(a);
size_b = Py_SIZE(b);
if (-2 <= size_a && size_a <= 2 && -2 <= size_b && size_b <= 2) {
Py_INCREF(a);
Py_INCREF(b);
goto simple;
}
/* Initial reduction: make sure that 0 <= b <= a. */
a = (PyLongObject *)long_abs(a);
if (a == NULL)
returnNULL;
b = (PyLongObject *)long_abs(b);
if (b == NULL) {
Py_DECREF(a);
returnNULL;
}
if (long_compare(a, b) < 0) {
r = a;
a = b;
b = r;
}
/* We now own references to a and b */
alloc_a = Py_SIZE(a);
alloc_b = Py_SIZE(b);
/* reduce until a fits into 2 digits */while ((size_a = Py_SIZE(a)) > 2) {
nbits = bit_length_digit(a->ob_digit[size_a-1]);
/* extract top 2*PyLong_SHIFT bits of a into x, along with
corresponding bits of b into y */
size_b = Py_SIZE(b);
assert(size_b <= size_a);
if (size_b == 0) {
if (size_a < alloc_a) {
r = (PyLongObject *)_PyLong_Copy(a);
Py_DECREF(a);
}
else
r = a;
Py_DECREF(b);
Py_XDECREF(c);
Py_XDECREF(d);
return (PyObject *)r;
}
x = (((twodigits)a->ob_digit[size_a-1] << (2*PyLong_SHIFT-nbits)) |
((twodigits)a->ob_digit[size_a-2] << (PyLong_SHIFT-nbits)) |
(a->ob_digit[size_a-3] >> nbits));
y = ((size_b >= size_a - 2 ? b->ob_digit[size_a-3] >> nbits : 0) |
(size_b >= size_a - 1 ? (twodigits)b->ob_digit[size_a-2] << (PyLong_SHIFT-nbits) : 0) |
(size_b >= size_a ? (twodigits)b->ob_digit[size_a-1] << (2*PyLong_SHIFT-nbits) : 0));
/* inner loop of Lehmer's algorithm; A, B, C, D never grow
larger than PyLong_MASK during the algorithm. */
A = 1; B = 0; C = 0; D = 1;
for (k=0;; k++) {
if (y-C == 0)
break;
q = (x+(A-1))/(y-C);
s = B+q*D;
t = x-q*y;
if (s > t)
break;
x = y; y = t;
t = A+q*C; A = D; B = C; C = s; D = t;
}
if (k == 0) {
/* no progress; do a Euclidean step */if (l_mod(a, b, &r) < 0)
goto error;
Py_SETREF(a, b);
b = r;
alloc_a = alloc_b;
alloc_b = Py_SIZE(b);
continue;
}
/*
a, b = A*b-B*a, D*a-C*b if k is odd
a, b = A*a-B*b, D*b-C*a if k is even
*/if (k&1) {
T = -A; A = -B; B = T;
T = -C; C = -D; D = T;
}
if (c != NULL) {
Py_SET_SIZE(c, size_a);
}
elseif (Py_REFCNT(a) == 1) {
c = (PyLongObject*)Py_NewRef(a);
}
else {
alloc_a = size_a;
c = _PyLong_New(size_a);
if (c == NULL)
goto error;
}
if (d != NULL) {
Py_SET_SIZE(d, size_a);
}
elseif (Py_REFCNT(b) == 1 && size_a <= alloc_b) {
d = (PyLongObject*)Py_NewRef(b);
Py_SET_SIZE(d, size_a);
}
else {
alloc_b = size_a;
d = _PyLong_New(size_a);
if (d == NULL)
goto error;
}
a_end = a->ob_digit + size_a;
b_end = b->ob_digit + size_b;
/* compute new a and new b in parallel */
a_digit = a->ob_digit;
b_digit = b->ob_digit;
c_digit = c->ob_digit;
d_digit = d->ob_digit;
c_carry = 0;
d_carry = 0;
while (b_digit < b_end) {
c_carry += (A * *a_digit) - (B * *b_digit);
d_carry += (D * *b_digit++) - (C * *a_digit++);
*c_digit++ = (digit)(c_carry & PyLong_MASK);
*d_digit++ = (digit)(d_carry & PyLong_MASK);
c_carry >>= PyLong_SHIFT;
d_carry >>= PyLong_SHIFT;
}
while (a_digit < a_end) {
c_carry += A * *a_digit;
d_carry -= C * *a_digit++;
*c_digit++ = (digit)(c_carry & PyLong_MASK);
*d_digit++ = (digit)(d_carry & PyLong_MASK);
c_carry >>= PyLong_SHIFT;
d_carry >>= PyLong_SHIFT;
}
assert(c_carry == 0);
assert(d_carry == 0);
Py_INCREF(c);
Py_INCREF(d);
Py_DECREF(a);
Py_DECREF(b);
a = long_normalize(c);
b = long_normalize(d);
}
Py_XDECREF(c);
Py_XDECREF(d);
simple:
assert(Py_REFCNT(a) > 0);
assert(Py_REFCNT(b) > 0);
/* Issue #24999: use two shifts instead of ">> 2*PyLong_SHIFT" to avoid
undefined behaviour when LONG_MAX type is smaller than 60 bits */#if LONG_MAX >> PyLong_SHIFT >> PyLong_SHIFT/* a fits into a long, so b must too */
x = PyLong_AsLong((PyObject *)a);
y = PyLong_AsLong((PyObject *)b);
#elif LLONG_MAX >> PyLong_SHIFT >> PyLong_SHIFT
x = PyLong_AsLongLong((PyObject *)a);
y = PyLong_AsLongLong((PyObject *)b);
#else# error"_PyLong_GCD"#endif
x = Py_ABS(x);
y = Py_ABS(y);
Py_DECREF(a);
Py_DECREF(b);
/* usual Euclidean algorithm for longs */while (y != 0) {
t = y;
y = x % y;
x = t;
}
#if LONG_MAX >> PyLong_SHIFT >> PyLong_SHIFTreturn PyLong_FromLong(x);
#elif LLONG_MAX >> PyLong_SHIFT >> PyLong_SHIFTreturn PyLong_FromLongLong(x);
#else# error"_PyLong_GCD"#endif
error:
Py_DECREF(a);
Py_DECREF(b);
Py_XDECREF(c);
Py_XDECREF(d);
returnNULL;
}
参考:
https://stackoverflow.com/questions/2962640/what-algorithm-does-python-employ-in-fractions-gcd
Lehmer's GCD algorithm, named after Derrick Henry Lehmer , is a fast GCD algorithm , an improvement on the simpler but slower Euclidean algorithm . It is mainly used for big integers that have a representation as a string of digits relative to some chosen numeral system base , say β = 1000 or β = 232.
Algorithm[ edit]
Lehmer noted that most of the quotients from each step of the division part of the standard algorithm are small. (For example, Knuth observed that the quotients 1, 2, and 3 comprise 67.7% of all quotients. [1] ) Those small quotients can be identified from only a few leading digits. Thus the algorithm starts by splitting off those leading digits and computing the sequence of quotients as long as it is correct.
Say we want to obtain the GCD of the two integers a and b. Let a ≥ b.
If b contains only one digit (in the chosen base, say β = 1000 or β = 2 32), use some other method, such as the Euclidean algorithm, to obtain the result.
If a and b differ in the length of digits, perform a division so that a and b are equal in length, with length equal to m.
Outer loop: Iterate until one of a or b is zero:
Decrease m by one. Let x be the leading (most significant) digit in a, x = a div β m and y the leading digit in b, y = b div β m.
Initialize a 2 by 3 matrix
[������]
to an extended identity matrix [10�01�],
and perform the euclidean algorithm simultaneously on the pairs ( x + A, y + C) and ( x + B, y + D), until the quotients differ. That is, iterate as an inner loop:
Compute the quotients w 1 of the long divisions of ( x + A) by ( y + C) and w 2 of ( x + B) by ( y + D) respectively. Also let w be the (not computed) quotient from the current long division in the chain of long divisions of the euclidean algorithm.
If w 1 ≠ w 2, then break out of the inner iteration. Else set w to w 1 (or w 2).
Replace the current matrix
[������]
with the matrix product
[011−�]⋅[������]=[����−���−���−��]
according to the matrix formulation of the extended euclidean algorithm.
If B ≠ 0, go to the start of the inner loop.
If B = 0, we have reached a deadlock; perform a normal step of the euclidean algorithm with a and b, and restart the outer loop.
Set a to aA + bB and b to Ca + Db (again simultaneously). This applies the steps of the euclidean algorithm that were performed on the leading digits in compressed form to the long integers a and b. If b ≠ 0 go to the start of the outer loop.