statistics - 如何在 C++ 中正确计算 ACF？

Question

我想手动复制一篇文章的作者在他们的研究中使用的方法（DOI：10.1038/s41598-017-02750-9（第 8 页。顶部））。它被称为“ACF”，所以我写了不同的函数：

1、一个基于youtube视频的版本（https://youtu.be/ZjaBn93YPWo?t=417）使用Alglibs Pearson相关系数函数

2，然后根据上面提到的文章中描述的公式的另一个版本

3，然后是基于在线 ACF 计算器页面 ( https://planetcalc.com/7908/ )中描述的简化公式的另一个版本

4，然后是基于那里描述的更长公式的版本（https://planetcalc.com/7908/）

=> 然而，所有这些都会给出不同的输出。但是，方法 3. 与在我的浏览器中运行的在线计算器的输出一致：https://planetcalc.com/7884/? d=.bTkjs.ymyQ8blXMoYiMgIOOmzhhI4fnckel.J5yEDWtV89Gz32Ch0kse2s

我的代码在这里：

#include <iostream>

#define _WIN32_WINNT 0x0500
#include<windows.h>
//#include <cmath>

#include "alglib/alglibinternal.h"
#include "alglib/alglibmisc.h"
#include "alglib/ap.h"
#include "alglib/dataanalysis.h"
#include "alglib/diffequations.h"
#include "alglib/fasttransforms.h"
#include "alglib/integration.h"
#include "alglib/interpolation.h"
#include "alglib/linalg.h"
#include "alglib/optimization.h"
#include "alglib/solvers.h"
#include "alglib/specialfunctions.h"
#include "alglib/statistics.h"
#include "alglib/stdafx.h"

using namespace std;


double* normalize(double* _arr, int _s) {
    double* output = new double[_s];
    double mod = 0.0;

    for (size_t i = 0; i < _s; ++i)
        mod += _arr[i] * _arr[i];

    double mag = sqrt(mod); //TODO: if 0, throw exc
    double mag_inv = 1.0 / mag;

    for (size_t i = 0; i < _s; ++i)
        output[i] = _arr[i] * mag_inv;

    return output;
}

void doACFyoutube(double* _ina, int _s)
// https://youtu.be/ZjaBn93YPWo?t=417   => the most unefficient, but understandable method
{
    double* temp_x;
    double* temp_y;

    double* ACFoutput = new double[_s];
    for(int shift = 0; shift < _s; shift++)
    {
        temp_x = new double[_s-shift];
        temp_y = new double[_s-shift];

        for(int cpy = 0; cpy < _s-shift; cpy++)
        {
            temp_x[cpy] = _ina[cpy];
            temp_y[cpy] = _ina[cpy+shift];
        }

        temp_y = normalize(temp_y, _s-shift); //not sure if needed //TODO: leak

        alglib::real_1d_array temp_x_alglib;
        alglib::real_1d_array temp_y_alglib;

        temp_x_alglib.setcontent(_s-shift, temp_x);
        temp_y_alglib.setcontent(_s-shift, temp_y);

        ACFoutput[shift] = alglib::pearsoncorr2(temp_x_alglib, temp_y_alglib); //Pearson product-moment correlation coefficient

        delete temp_x;
        delete temp_y;
    }

    for(int i=0; i<_s; i++)
        cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}

void doACFgoal(double* _ina, int _s)
// DOI: 10.1038/s41598-017-02750-9    => page 8, first equation (my goal is to reproduce this)
{
    double mean = 0; //mean
    for(int a = 0; a < _s; a++ )
      mean += _ina[a];
    mean /= _s;

    double var = 0; //variance
    for(int b = 0; b < _s; b++ )
      var += (_ina[b]-mean)*(_ina[b]-mean);
    var /= _s-1; //needed? (-1) a.k.a. Bessell's correction ?

    double* ACFoutput = new double[_s];
    for(int i = 0; i < _s; i++)
    {
        double temp_sum = 0;

        for(int j = 1; j <= _s-i; j++)
            temp_sum += (_ina[j]-mean)*(_ina[j+i]-mean);

        ACFoutput[i] = (double)1/(((double)_s-(double)i)*var*var) * temp_sum;
    }

    for(int i=0; i<_s; i++)
        cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}

void doACFplanetcalcCoarse(double* _ina, int _s)
// https://planetcalc.com/7908/
{
    double mean = 0; //mean
    for(int a = 0; a < _s; a++ )
      mean += _ina[a];
    mean /= _s;

    double* ACFoutput = new double[_s];
    for(int i = 0; i < _s; i++)
    {
        double temp_sum1 = 0;
        double temp_sum2 = 0;

        for(int j = 0; j < _s-i; j++)
            temp_sum1 += (_ina[j]-mean)*(_ina[j+i]-mean);

        for(int k = 0; k < _s; k++)
            temp_sum2 += (_ina[k]-mean)*(_ina[k]-mean);

        ACFoutput[i] = temp_sum1 / temp_sum2;
    }

    for(int i=0; i<_s; i++)
        cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}

void doACFplanetcalcFine(double* _ina, int _s)
// https://planetcalc.com/7908/     => gives different output than the online calculator script, even though uses the longer formula described there
{
    double* ACFoutput = new double[_s];
    for(int k = 0; k < _s; k++)
    {
        double mean1 = 0;    //mean of first N-k values
        for(int a = 0; a < _s-k; a++ )
          mean1 += _ina[a];
        mean1 /= _s-k;
 //       cout << "\t mean of first N-" << k << " values = " << mean1 << endl;

        double mean2 = 0;    //mean of last N-k values
        for(int a = k; a < _s; a++ )
          mean2 += _ina[a];
        mean2 /= _s-k;
 //       cout << "\t mean of last N-" << k << " values = " << mean2 << endl;


        double temp_sum1 = 0;
        double temp_sum2 = 0;
        double temp_sum3 = 0;

        for(int i = 0; i < _s-k; i++)
        {
            temp_sum1 += (_ina[i]-mean1)*(_ina[i+k]-mean2);
 //           cout << "\t\t temp_sum1 (" << i << ") = " << temp_sum1 << endl;
        }
//        cout << "\t temp_sum1 = " << temp_sum1 << endl;

        for(int i = 0; i < _s-k; i++)
        {
            temp_sum2 += (_ina[i]-mean2)*(_ina[i]-mean2); //pow2
 //           cout << "\t\t temp_sum2 (" << i << ") = " << temp_sum2 << endl;
        }
 //       cout << "\t temp_sum2 = " << temp_sum2 << endl;

        for(int i = 0; i < _s-k; i++)
        {
            temp_sum3 += (_ina[i+k]-mean2)*(_ina[i+k]-mean2); //pow2
 //           cout << "\t\t temp_sum3 (" << i << ") = " << temp_sum3 << endl;
        }
 //       cout << "\t temp_sum3 = " << temp_sum3 << endl;

        ACFoutput[k] = temp_sum1 / (sqrt(temp_sum2)*sqrt(temp_sum3));
    }

    for(int i=0; i<_s; i++)
        cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}

int main()
{
    //fullscreenhez
    HWND hWnd = GetConsoleWindow();
    ShowWindow(hWnd,SW_SHOWMAXIMIZED);

    double ina[15] = {2,3,4,5,4,3,4,5,6,7,6,5,4,3,4}; //15 elem
    for(int x=0; x<15; x++)
        cout << ina[x] << ",";
    cout << endl;

    cout << endl;
    // https://youtu.be/ZjaBn93YPWo?t=417   => the most unefficient, but understandable method
    doACFyoutube(ina, 15);          // ??? result doesn't match any other

    cout << endl;
    // DOI: 10.1038/s41598-017-02750-9    => page 8, first equation (my goal is to reproduce this)
    doACFgoal(ina, 15);             // ??? result doesn't match any other

    cout << endl;
    // https://planetcalc.com/7908/ (simplified formula)
    doACFplanetcalcCoarse(ina, 15); //result equals to the online calculator result: https://planetcalc.com/7884/?_d=.bTkjs.ymyQ8blXMoYiMgIOOmzhhI4fnckel.J5yEDWtV89Gz32Ch0kse2s_

    cout << endl;
    // https://planetcalc.com/7908/ (longer formula)
    doACFplanetcalcFine(ina, 15);   // ??? result doesn't match any other

    return 0;
}

输出如下所示：

由于我没有他们在出版物中使用的原始数据，我只能依靠我的程序输出与其他代码输出的一致性如何。但是这些输出是不同的，我不知道为什么。您能否看一下代码并帮助我得到四个相等的输出？

（代码块项目压缩在这里： https ://drive.google.com/file/d/1s3SeJSiDgk-hiMazp94HfFerL582VG2K/view?usp=sharing ）

statistics - 如何在 C++ 中正确计算 ACF？

0 回答 0

Related

Reference