c++ - 如何加快系列生成？

Question

该问题需要生成n-th类似于斐波那契数列的数列元素。但是，它有点棘手，因为n它非常大（1 <= n <= 10^9）。答案然后模1000000007。序列定义如下：
在此处输入图像描述

使用生成函数，我得到以下公式：在此处输入图像描述

如果我使用序列方法，那么答案可以是模数，但运行速度非常慢。事实上，我得到了time limit exceed很多次。我还尝试使用表来预生成一些初始值（缓存），但速度仍然不够快。此外，array/vector与 10^9 相比，我可以在 (C++) 中存储的最大元素数量太少，所以我猜这种方法也行不通。
如果我使用直接公式，那么它运行得非常快，但只是因为n它很小。对于n大的，双精度数将被截断，而且我将无法用该数字修改我的答案，因为模数仅适用于整数。
我没有想法，我认为必须有一个非常好的技巧来解决这个问题，不幸的是我想不出一个。任何想法将不胜感激。

这是我的初步方法：

#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <cmath>
#include <cassert>
#include <bitset>
#include <fstream>
#include <iomanip>
#include <set>
#include <stack>
#include <sstream>
#include <cstdio>
#include <map>
#include <cmath>

using namespace std;

typedef unsigned long long ull;

ull count_fair_coins_by_generating_function(ull n) {
    n--;
    return 
        (sqrt(3.0) + 1)/((sqrt(3.0) - 1) * 2 * sqrt(3.0)) * pow(2 / (sqrt(3.0) - 1), n * 1.0) 
        +
        (1 - sqrt(3.0))/((sqrt(3.0) + 1) * 2 * sqrt(3.0)) * pow(-2 / (sqrt(3.0) + 1), n * 1.0);
}

ull count_fair_coins(ull n) {
    if (n == 1) {
        return 1;
    }
    else if (n == 2) {
        return 3;
    }
    else {
        ull a1 = 1;
        ull a2 = 3;
        ull result;
        for (ull i = 3; i <= n; ++i) {
            result = (2*a2 + 2*a1) % 1000000007;
            a1 = a2;
            a2 = result;
        }

        return result;
    }
}

void inout_my_fair_coins() {
    int test_cases;
    cin >> test_cases;

    map<ull, ull> cache;
    ull n;
    while (test_cases--) {
        cin >> n;
        cout << count_fair_coins_by_generating_function(n) << endl;
        cout << count_fair_coins(n) << endl;
    }
}

int main() {
    inout_my_fair_coins();
    return 0;
}

更新由于比赛结束了，我根据tskuzzy想法为有兴趣的人发布了我的解决方案。再次感谢tskuzzy。您可以在此处查看原始问题陈述：http: //www.codechef.com/problems/CSUMD
首先，您需要计算出这些1 coin和的概率2 coin，然后获取一些初始值以获得序列。完整的解决方案在这里：

#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <cmath>
#include <cassert>
#include <bitset>
#include <fstream>
#include <iomanip>
#include <set>
#include <stack>
#include <sstream>
#include <cstdio>
#include <map>
#include <cmath>

using namespace std;

typedef unsigned long long ull;

const ull special_prime = 1000000007;

/*
    Using generating function for the recurrence:
           | 1                     if n = 1
    a_n =  | 3                     if n = 2
           | 2a_{n-1} + 2a_{n-2}     if n > 2

    This method is probably the fastest one but it won't work 
    because when n is large, double just can't afford it. Plus,
    using this formula, we can't apply mod for floating point number.
    1 <= n <= 21
*/
ull count_fair_coins_by_generating_function(ull n) {
    n--;
    return 
        (sqrt(3.0) + 1)/((sqrt(3.0) - 1) * 2 * sqrt(3.0)) * pow(2 / (sqrt(3.0) - 1), n * 1.0) 
        +
        (1 - sqrt(3.0))/((sqrt(3.0) + 1) * 2 * sqrt(3.0)) * pow(-2 / (sqrt(3.0) + 1), n * 1.0);
}

/*
    Naive approach, it works but very slow. 
    Useful for testing.
*/
ull count_fair_coins(ull n) {
    if (n == 1) {
        return 1;
    }
    else if (n == 2) {
        return 3;
    }
    else {
        ull a1 = 1;
        ull a2 = 3;
        ull result;
        for (ull i = 3; i <= n; ++i) {
            result = (2*a2 + 2*a1) % 1000000007;
            a1 = a2;
            a2 = result;
        }

        return result;
    }
}

struct matrix_2_by_2 {
    ull m[2][2];
    ull a[2][2];
    ull b[2][2];

    explicit matrix_2_by_2(ull a00, ull a01, ull a10, ull a11) {
        m[0][0] = a00;
        m[0][1] = a01;
        m[1][0] = a10;
        m[1][1] = a11;
    }

    matrix_2_by_2 operator *(const matrix_2_by_2& rhs) const {
        matrix_2_by_2 result(0, 0, 0, 0);
        result.m[0][0] = (m[0][0] * rhs.m[0][0]) + (m[0][1] * rhs.m[1][0]);
        result.m[0][1] = (m[0][0] * rhs.m[0][1]) + (m[0][1] * rhs.m[1][1]);
        result.m[1][0] = (m[1][0] * rhs.m[0][0]) + (m[1][1] * rhs.m[1][0]);
        result.m[1][1] = (m[1][0] * rhs.m[0][1]) + (m[1][1] * rhs.m[1][1]);
        return result;
    }

    void square() {
        a[0][0] = b[0][0] = m[0][0];
        a[0][1] = b[0][1] = m[0][1];
        a[1][0] = b[1][0] = m[1][0];
        a[1][1] = b[1][1] = m[1][1];

        m[0][0] = (a[0][0] * b[0][0]) + (a[0][1] * b[1][0]);
        m[0][1] = (a[0][0] * b[0][1]) + (a[0][1] * b[1][1]);
        m[1][0] = (a[1][0] * b[0][0]) + (a[1][1] * b[1][0]);
        m[1][1] = (a[1][0] * b[0][1]) + (a[1][1] * b[1][1]);
    }

    void mod(ull n) {
        m[0][0] %= n;
        m[0][1] %= n;
        m[1][0] %= n;
        m[1][1] %= n;
    }

    /*
        exponentiation by squaring algorithm
                | 1                    if n = 0 
                | (1/x)^n              if n < 0 
        x^n =   | x.x^({(n-1)/2})^2    if n is odd
                | (x^{n/2})^2          if n is even

        The following algorithm calculate a^p % m
        int modulo(int a, int p, int m){
            long long x = 1;
            long long y = a; 

            while (p > 0) {
                if (p % 2 == 1){
                    x = (x * y) % m;
                }

                // squaring the base
                y = (y * y) % m; 
                p /= 2;
            }

            return x % c;
        }

        To apply for matrix, we need an identity which is
        equivalent to 1, then perform multiplication for matrix 
        in similar manner. Thus the algorithm is defined 
        as follows:
    */
    void operator ^=(ull p) {
        matrix_2_by_2 identity(1, 0, 0, 1);

        while (p > 0) {
            if (p % 2) {
                identity = operator*(identity);
                identity.mod(special_prime);
            }

            this->square();
            this->mod(special_prime);
            p /= 2;
        }

        m[0][0] = identity.m[0][0];
        m[0][1] = identity.m[0][1];
        m[1][0] = identity.m[1][0];
        m[1][1] = identity.m[1][1];
    }

    friend
    ostream& operator <<(ostream& out, const matrix_2_by_2& rhs) {
        out << rhs.m[0][0] << ' ' << rhs.m[0][1] << '\n';
        out << rhs.m[1][0] << ' ' << rhs.m[1][1] << '\n';
        return out;
    }
};

/*
    |a_{n+2}| = |2 2|^n  x |3| 
    |a_{n+1}|   |1 0|      |1|
*/
ull count_fair_coins_by_matrix(ull n) {
    if (n == 1) {
        return 1;
    } else {
        matrix_2_by_2 m(2, 2, 1, 0);
        m ^= (n - 1);
        return (m.m[1][0] * 3 + m.m[1][1]) % 1000000007;
    }
}

void inout_my_fair_coins() {
    int test_cases;
    scanf("%d", &test_cases);

    ull n;
    while (test_cases--) {
        scanf("%llu", &n);
        printf("%d\n", count_fair_coins_by_matrix(n));
    }
}

int main() {
    inout_my_fair_coins();
    return 0;
}

score 17 · Accepted Answer

您可以根据矩阵指数编写序列的项：

在此处输入图像描述

可以通过平方使用取幂来快速评估。这导致了一个O(log n)解决方案，该解决方案应该在时间限制内很好地解决问题。

仅供将来参考，如果您需要与大数相乘（在这种情况下不适用，因为答案取模 1000000007），您应该查看Karatsuba algorithm。这为您提供了二次时间乘法。

score 1 · Accepted Answer

只是在这里思考，但看看 Duff 的 count_fair_coins 函数设备，因为它会自动展开循环以加速该函数。

在生成函数中预先计算 sqrt 似乎是加快速度的最简单方法。这将减少到只有一个 pow 调用和常量乘法。除了预先计算 sqrt 的另一种加速它的方法是删除除法并使用逆乘法，尽管非常轻微的优化它可能有助于在 n 非常大时加快速度。

c++ - 如何加快系列生成？

2 回答 2

Related

Reference