我想手动复制一篇文章的作者在他们的研究中使用的方法(DOI:10.1038/s41598-017-02750-9(第 8 页。顶部))。它被称为“ACF”,所以我写了不同的函数:
1、一个基于youtube视频的版本(https://youtu.be/ZjaBn93YPWo?t=417)使用Alglibs Pearson相关系数函数
2,然后根据上面提到的文章中描述的公式的另一个版本
3,然后是基于在线 ACF 计算器页面 ( https://planetcalc.com/7908/ )中描述的简化公式的另一个版本
4,然后是基于那里描述的更长公式的版本(https://planetcalc.com/7908/)
=> 然而,所有这些都会给出不同的输出。但是,方法 3. 与在我的浏览器中运行的在线计算器的输出一致:https://planetcalc.com/7884/? d=.bTkjs.ymyQ8blXMoYiMgIOOmzhhI4fnckel.J5yEDWtV89Gz32Ch0kse2s
我的代码在这里:
#include <iostream>
#define _WIN32_WINNT 0x0500
#include<windows.h>
//#include <cmath>
#include "alglib/alglibinternal.h"
#include "alglib/alglibmisc.h"
#include "alglib/ap.h"
#include "alglib/dataanalysis.h"
#include "alglib/diffequations.h"
#include "alglib/fasttransforms.h"
#include "alglib/integration.h"
#include "alglib/interpolation.h"
#include "alglib/linalg.h"
#include "alglib/optimization.h"
#include "alglib/solvers.h"
#include "alglib/specialfunctions.h"
#include "alglib/statistics.h"
#include "alglib/stdafx.h"
using namespace std;
double* normalize(double* _arr, int _s) {
double* output = new double[_s];
double mod = 0.0;
for (size_t i = 0; i < _s; ++i)
mod += _arr[i] * _arr[i];
double mag = sqrt(mod); //TODO: if 0, throw exc
double mag_inv = 1.0 / mag;
for (size_t i = 0; i < _s; ++i)
output[i] = _arr[i] * mag_inv;
return output;
}
void doACFyoutube(double* _ina, int _s)
// https://youtu.be/ZjaBn93YPWo?t=417 => the most unefficient, but understandable method
{
double* temp_x;
double* temp_y;
double* ACFoutput = new double[_s];
for(int shift = 0; shift < _s; shift++)
{
temp_x = new double[_s-shift];
temp_y = new double[_s-shift];
for(int cpy = 0; cpy < _s-shift; cpy++)
{
temp_x[cpy] = _ina[cpy];
temp_y[cpy] = _ina[cpy+shift];
}
temp_y = normalize(temp_y, _s-shift); //not sure if needed //TODO: leak
alglib::real_1d_array temp_x_alglib;
alglib::real_1d_array temp_y_alglib;
temp_x_alglib.setcontent(_s-shift, temp_x);
temp_y_alglib.setcontent(_s-shift, temp_y);
ACFoutput[shift] = alglib::pearsoncorr2(temp_x_alglib, temp_y_alglib); //Pearson product-moment correlation coefficient
delete temp_x;
delete temp_y;
}
for(int i=0; i<_s; i++)
cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}
void doACFgoal(double* _ina, int _s)
// DOI: 10.1038/s41598-017-02750-9 => page 8, first equation (my goal is to reproduce this)
{
double mean = 0; //mean
for(int a = 0; a < _s; a++ )
mean += _ina[a];
mean /= _s;
double var = 0; //variance
for(int b = 0; b < _s; b++ )
var += (_ina[b]-mean)*(_ina[b]-mean);
var /= _s-1; //needed? (-1) a.k.a. Bessell's correction ?
double* ACFoutput = new double[_s];
for(int i = 0; i < _s; i++)
{
double temp_sum = 0;
for(int j = 1; j <= _s-i; j++)
temp_sum += (_ina[j]-mean)*(_ina[j+i]-mean);
ACFoutput[i] = (double)1/(((double)_s-(double)i)*var*var) * temp_sum;
}
for(int i=0; i<_s; i++)
cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}
void doACFplanetcalcCoarse(double* _ina, int _s)
// https://planetcalc.com/7908/
{
double mean = 0; //mean
for(int a = 0; a < _s; a++ )
mean += _ina[a];
mean /= _s;
double* ACFoutput = new double[_s];
for(int i = 0; i < _s; i++)
{
double temp_sum1 = 0;
double temp_sum2 = 0;
for(int j = 0; j < _s-i; j++)
temp_sum1 += (_ina[j]-mean)*(_ina[j+i]-mean);
for(int k = 0; k < _s; k++)
temp_sum2 += (_ina[k]-mean)*(_ina[k]-mean);
ACFoutput[i] = temp_sum1 / temp_sum2;
}
for(int i=0; i<_s; i++)
cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}
void doACFplanetcalcFine(double* _ina, int _s)
// https://planetcalc.com/7908/ => gives different output than the online calculator script, even though uses the longer formula described there
{
double* ACFoutput = new double[_s];
for(int k = 0; k < _s; k++)
{
double mean1 = 0; //mean of first N-k values
for(int a = 0; a < _s-k; a++ )
mean1 += _ina[a];
mean1 /= _s-k;
// cout << "\t mean of first N-" << k << " values = " << mean1 << endl;
double mean2 = 0; //mean of last N-k values
for(int a = k; a < _s; a++ )
mean2 += _ina[a];
mean2 /= _s-k;
// cout << "\t mean of last N-" << k << " values = " << mean2 << endl;
double temp_sum1 = 0;
double temp_sum2 = 0;
double temp_sum3 = 0;
for(int i = 0; i < _s-k; i++)
{
temp_sum1 += (_ina[i]-mean1)*(_ina[i+k]-mean2);
// cout << "\t\t temp_sum1 (" << i << ") = " << temp_sum1 << endl;
}
// cout << "\t temp_sum1 = " << temp_sum1 << endl;
for(int i = 0; i < _s-k; i++)
{
temp_sum2 += (_ina[i]-mean2)*(_ina[i]-mean2); //pow2
// cout << "\t\t temp_sum2 (" << i << ") = " << temp_sum2 << endl;
}
// cout << "\t temp_sum2 = " << temp_sum2 << endl;
for(int i = 0; i < _s-k; i++)
{
temp_sum3 += (_ina[i+k]-mean2)*(_ina[i+k]-mean2); //pow2
// cout << "\t\t temp_sum3 (" << i << ") = " << temp_sum3 << endl;
}
// cout << "\t temp_sum3 = " << temp_sum3 << endl;
ACFoutput[k] = temp_sum1 / (sqrt(temp_sum2)*sqrt(temp_sum3));
}
for(int i=0; i<_s; i++)
cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}
int main()
{
//fullscreenhez
HWND hWnd = GetConsoleWindow();
ShowWindow(hWnd,SW_SHOWMAXIMIZED);
double ina[15] = {2,3,4,5,4,3,4,5,6,7,6,5,4,3,4}; //15 elem
for(int x=0; x<15; x++)
cout << ina[x] << ",";
cout << endl;
cout << endl;
// https://youtu.be/ZjaBn93YPWo?t=417 => the most unefficient, but understandable method
doACFyoutube(ina, 15); // ??? result doesn't match any other
cout << endl;
// DOI: 10.1038/s41598-017-02750-9 => page 8, first equation (my goal is to reproduce this)
doACFgoal(ina, 15); // ??? result doesn't match any other
cout << endl;
// https://planetcalc.com/7908/ (simplified formula)
doACFplanetcalcCoarse(ina, 15); //result equals to the online calculator result: https://planetcalc.com/7884/?_d=.bTkjs.ymyQ8blXMoYiMgIOOmzhhI4fnckel.J5yEDWtV89Gz32Ch0kse2s_
cout << endl;
// https://planetcalc.com/7908/ (longer formula)
doACFplanetcalcFine(ina, 15); // ??? result doesn't match any other
return 0;
}
输出如下所示:
由于我没有他们在出版物中使用的原始数据,我只能依靠我的程序输出与其他代码输出的一致性如何。但是这些输出是不同的,我不知道为什么。您能否看一下代码并帮助我得到四个相等的输出?
(代码块项目压缩在这里: https ://drive.google.com/file/d/1s3SeJSiDgk-hiMazp94HfFerL582VG2K/view?usp=sharing )