【C】Lehmer‘s GCD algorithm

在求解大数的gcd时,欧几里得算法够简单但不够快,美国数学家Derrick Henry Lehmer 的算法更快。

python内置math.gcd的算法就是基于此算法。

PyObject *
_PyLong_GCD(PyObject *aarg, PyObject *barg)
{
    PyLongObject *a, *b, *c = NULL, *d = NULL, *r;
    stwodigits x, y, q, s, t, c_carry, d_carry;
    stwodigits A, B, C, D, T;
    int nbits, k;
    Py_ssize_t size_a, size_b, alloc_a, alloc_b;
    digit *a_digit, *b_digit, *c_digit, *d_digit, *a_end, *b_end;

    a = (PyLongObject *)aarg;
    b = (PyLongObject *)barg;
    size_a = Py_SIZE(a);
    size_b = Py_SIZE(b);
    if (-2 <= size_a && size_a <= 2 && -2 <= size_b && size_b <= 2) {
        Py_INCREF(a);
        Py_INCREF(b);
        goto simple;
    }

    /* Initial reduction: make sure that 0 <= b <= a. */
    a = (PyLongObject *)long_abs(a);
    if (a == NULL)
        returnNULL;
    b = (PyLongObject *)long_abs(b);
    if (b == NULL) {
        Py_DECREF(a);
        returnNULL;
    }
    if (long_compare(a, b) < 0) {
        r = a;
        a = b;
        b = r;
    }
    /* We now own references to a and b */

    alloc_a = Py_SIZE(a);
    alloc_b = Py_SIZE(b);
    /* reduce until a fits into 2 digits */while ((size_a = Py_SIZE(a)) > 2) {
        nbits = bit_length_digit(a->ob_digit[size_a-1]);
        /* extract top 2*PyLong_SHIFT bits of a into x, along with
           corresponding bits of b into y */
        size_b = Py_SIZE(b);
        assert(size_b <= size_a);
        if (size_b == 0) {
            if (size_a < alloc_a) {
                r = (PyLongObject *)_PyLong_Copy(a);
                Py_DECREF(a);
            }
            else
                r = a;
            Py_DECREF(b);
            Py_XDECREF(c);
            Py_XDECREF(d);
            return (PyObject *)r;
        }
        x = (((twodigits)a->ob_digit[size_a-1] << (2*PyLong_SHIFT-nbits)) |
             ((twodigits)a->ob_digit[size_a-2] << (PyLong_SHIFT-nbits)) |
             (a->ob_digit[size_a-3] >> nbits));

        y = ((size_b >= size_a - 2 ? b->ob_digit[size_a-3] >> nbits : 0) |
             (size_b >= size_a - 1 ? (twodigits)b->ob_digit[size_a-2] << (PyLong_SHIFT-nbits) : 0) |
             (size_b >= size_a ? (twodigits)b->ob_digit[size_a-1] << (2*PyLong_SHIFT-nbits) : 0));

        /* inner loop of Lehmer's algorithm; A, B, C, D never grow
           larger than PyLong_MASK during the algorithm. */
        A = 1; B = 0; C = 0; D = 1;
        for (k=0;; k++) {
            if (y-C == 0)
                break;
            q = (x+(A-1))/(y-C);
            s = B+q*D;
            t = x-q*y;
            if (s > t)
                break;
            x = y; y = t;
            t = A+q*C; A = D; B = C; C = s; D = t;
        }

        if (k == 0) {
            /* no progress; do a Euclidean step */if (l_mod(a, b, &r) < 0)
                goto error;
            Py_SETREF(a, b);
            b = r;
            alloc_a = alloc_b;
            alloc_b = Py_SIZE(b);
            continue;
        }

        /*
          a, b = A*b-B*a, D*a-C*b if k is odd
          a, b = A*a-B*b, D*b-C*a if k is even
        */if (k&1) {
            T = -A; A = -B; B = T;
            T = -C; C = -D; D = T;
        }
        if (c != NULL) {
            Py_SET_SIZE(c, size_a);
        }
        elseif (Py_REFCNT(a) == 1) {
            c = (PyLongObject*)Py_NewRef(a);
        }
        else {
            alloc_a = size_a;
            c = _PyLong_New(size_a);
            if (c == NULL)
                goto error;
        }

        if (d != NULL) {
            Py_SET_SIZE(d, size_a);
        }
        elseif (Py_REFCNT(b) == 1 && size_a <= alloc_b) {
            d = (PyLongObject*)Py_NewRef(b);
            Py_SET_SIZE(d, size_a);
        }
        else {
            alloc_b = size_a;
            d = _PyLong_New(size_a);
            if (d == NULL)
                goto error;
        }
        a_end = a->ob_digit + size_a;
        b_end = b->ob_digit + size_b;

        /* compute new a and new b in parallel */
        a_digit = a->ob_digit;
        b_digit = b->ob_digit;
        c_digit = c->ob_digit;
        d_digit = d->ob_digit;
        c_carry = 0;
        d_carry = 0;
        while (b_digit < b_end) {
            c_carry += (A * *a_digit) - (B * *b_digit);
            d_carry += (D * *b_digit++) - (C * *a_digit++);
            *c_digit++ = (digit)(c_carry & PyLong_MASK);
            *d_digit++ = (digit)(d_carry & PyLong_MASK);
            c_carry >>= PyLong_SHIFT;
            d_carry >>= PyLong_SHIFT;
        }
        while (a_digit < a_end) {
            c_carry += A * *a_digit;
            d_carry -= C * *a_digit++;
            *c_digit++ = (digit)(c_carry & PyLong_MASK);
            *d_digit++ = (digit)(d_carry & PyLong_MASK);
            c_carry >>= PyLong_SHIFT;
            d_carry >>= PyLong_SHIFT;
        }
        assert(c_carry == 0);
        assert(d_carry == 0);

        Py_INCREF(c);
        Py_INCREF(d);
        Py_DECREF(a);
        Py_DECREF(b);
        a = long_normalize(c);
        b = long_normalize(d);
    }
    Py_XDECREF(c);
    Py_XDECREF(d);

simple:
    assert(Py_REFCNT(a) > 0);
    assert(Py_REFCNT(b) > 0);
/* Issue #24999: use two shifts instead of ">> 2*PyLong_SHIFT" to avoid
   undefined behaviour when LONG_MAX type is smaller than 60 bits */#if LONG_MAX >> PyLong_SHIFT >> PyLong_SHIFT/* a fits into a long, so b must too */
    x = PyLong_AsLong((PyObject *)a);
    y = PyLong_AsLong((PyObject *)b);
#elif LLONG_MAX >> PyLong_SHIFT >> PyLong_SHIFT
    x = PyLong_AsLongLong((PyObject *)a);
    y = PyLong_AsLongLong((PyObject *)b);
#else# error"_PyLong_GCD"#endif
    x = Py_ABS(x);
    y = Py_ABS(y);
    Py_DECREF(a);
    Py_DECREF(b);

    /* usual Euclidean algorithm for longs */while (y != 0) {
        t = y;
        y = x % y;
        x = t;
    }
#if LONG_MAX >> PyLong_SHIFT >> PyLong_SHIFTreturn PyLong_FromLong(x);
#elif LLONG_MAX >> PyLong_SHIFT >> PyLong_SHIFTreturn PyLong_FromLongLong(x);
#else# error"_PyLong_GCD"#endif

error:
    Py_DECREF(a);
    Py_DECREF(b);
    Py_XDECREF(c);
    Py_XDECREF(d);
    returnNULL;
}

参考:

https://stackoverflow.com/questions/2962640/what-algorithm-does-python-employ-in-fractions-gcd

Lehmer's GCD algorithm, named after Derrick Henry Lehmer , is a fast GCD algorithm , an improvement on the simpler but slower Euclidean algorithm . It is mainly used for big integers that have a representation as a string of digits relative to some chosen numeral system base , say β = 1000 or β = 232.
Algorithm[ edit]

Lehmer noted that most of the quotients from each step of the division part of the standard algorithm are small. (For example, Knuth observed that the quotients 1, 2, and 3 comprise 67.7% of all quotients. [1] ) Those small quotients can be identified from only a few leading digits. Thus the algorithm starts by splitting off those leading digits and computing the sequence of quotients as long as it is correct.
Say we want to obtain the GCD of the two integers a and b. Let ab.
If b contains only one digit (in the chosen base, say β = 1000 or β = 2 32), use some other method, such as the Euclidean algorithm, to obtain the result.
If a and b differ in the length of digits, perform a division so that a and b are equal in length, with length equal to m.
Outer loop: Iterate until one of a or b is zero:
Decrease m by one. Let x be the leading (most significant) digit in a, x = a div β m and y the leading digit in b, y = b div β m.
Initialize a 2 by 3 matrix
[������]

to an extended identity matrix [10�01�],

and perform the euclidean algorithm simultaneously on the pairs ( x + A, y + C) and ( x + B, y + D), until the quotients differ. That is, iterate as an inner loop:
Compute the quotients w 1 of the long divisions of ( x + A) by ( y + C) and w 2 of ( x + B) by ( y + D) respectively. Also let w be the (not computed) quotient from the current long division in the chain of long divisions of the euclidean algorithm.
If w 1w 2, then break out of the inner iteration. Else set w to w 1 (or w 2).
Replace the current matrix
[������]

with the matrix product
[011−�]⋅[������]=[����−���−���−��]

according to the matrix formulation of the extended euclidean algorithm.
If B ≠ 0, go to the start of the inner loop.
If B = 0, we have reached a deadlock; perform a normal step of the euclidean algorithm with a and b, and restart the outer loop.
Set a to aA + bB and b to Ca + Db (again simultaneously). This applies the steps of the euclidean algorithm that were performed on the leading digits in compressed form to the long integers a and b. If b ≠ 0 go to the start of the outer loop.
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值