1

我正在尝试基于Paciorek 和 Schervish, 2005的方程 (5,6) 在 Python 中实现非平稳高斯协方差函数。见附图:在此处输入图像描述

我已经生成了一些我认为做正确的事情的代码,尽管它是非常低效的,因为它以C元素方式填充矩阵。请参见下面的合成示例:

import numpy as np
from scipy.spatial.distance import cdist

np.random.seed(1)

n = 100
x = np.random.rand(n,2)
length_scales = np.random.rand(n,2)
sigma2 = 1
C = np.zeros((n,n))
for i in range(n):
    Sigmai = np.diag(length_scales[i,:])
    xi = np.atleast_2d(x[i,:]).T
    for j in range(n):
        Sigmaj = np.diag(length_scales[j,:])
        xj = np.atleast_2d(x[j,:]).T
        Qij = cdist(np.dot(np.diag(1/(((Sigmai+Sigmaj)/2).diagonal())),xi).T,\
                    np.dot(np.diag(1/(((Sigmai+Sigmaj)/2).diagonal())),xj).T,'sqeuclidean')
        C[i,j] = sigma2 * np.prod(Sigmai.diagonal())**.25 * np.prod(Sigmaj.diagonal())**.25 *\
                    np.prod(((Sigmai+Sigmaj)/2).diagonal())**-.5 * np.exp(-Qij)

我意识到我可以通过填充 的下三角形来稍微提高效率C,但是如果很大,n这仍然很慢......

我的问题是,是否可以重写上面的代码,这样我就不必C迭代计算了?

4

1 回答 1

1

请参阅一个示例 usingnp.meshgrid作为避免使用 for 循环的方法,以及每种情况的运行时间比较:

for 循环

import numpy as np
from scipy.spatial.distance import cdist
import time

np.random.seed(1)

n = 100
x = np.random.rand(n,2)
length_scales = np.random.rand(n,2)
sigma2 = 1

t = time.time()
C1 = np.zeros((n,n))
for i in range(n):
    Sigmai = np.diag(length_scales[i,:])
    xi = np.atleast_2d(x[i,:]).T
    for j in range(n):
        Sigmaj = np.diag(length_scales[j,:])
        xj = np.atleast_2d(x[j,:]).T
        Qij = cdist(np.dot(np.diag(1/(((Sigmai+Sigmaj)/2).diagonal())),xi).T,\
                    np.dot(np.diag(1/(((Sigmai+Sigmaj)/2).diagonal())),xj).T,'sqeuclidean')
        C1[i,j] = sigma2 * np.prod(Sigmai.diagonal())**.25 * np.prod(Sigmaj.diagonal())**.25 *\
                    np.prod(((Sigmai+Sigmaj)/2).diagonal())**-.5 * np.exp(-Qij)
print('for loops:',time.time()-t,'seconds')

np.meshgrid

t = time.time()
Sigma = np.prod(length_scales,axis=1)**.25
length_scales_x1,length_scales_x2 = np.meshgrid(length_scales[:,0],length_scales[:,0])
length_scales_y1,length_scales_y2 = np.meshgrid(length_scales[:,1],length_scales[:,1])
length_mean = np.array([(length_scales_x1+length_scales_x2)/2,(length_scales_y1+length_scales_y2)/2]).transpose(1,2,0)
Sigma_i,Sigma_j = np.meshgrid(Sigma,Sigma)
Sigma_ij = np.prod(length_mean,2)**-.5
        
x1,x2 = np.meshgrid(x[:,0],x[:,0])
y1,y2 = np.meshgrid(x[:,1],x[:,1])
xi = np.reshape(np.array([x1,y1]).transpose(1,2,0)/length_mean,(x.shape[0]*x.shape[0],2))
xj = np.reshape(np.array([x2,y2]).transpose(1,2,0)/length_mean,(x.shape[0]*x.shape[0],2))
Qij = np.reshape((xi[:,0]-xj[:,0])**2 + (xi[:,1]-xj[:,1])**2,(x.shape[0],x.shape[0]))
C2 = sigma2 * Sigma_i * Sigma_j * Sigma_ij * np.exp(-Qij)

print('meshgrids:',time.time()-t,'seconds')
print(np.isclose(C1,C2,atol=1e-12))

打印:

for loops: 0.6633138656616211 seconds
meshgrids: 0.0023801326751708984 seconds
[[ True  True  True ...  True  True  True]
[ True  True  True ...  True  True  True]
[ True  True  True ...  True  True  True]
...
[ True  True  True ...  True  True  True]
[ True  True  True ...  True  True  True]
[ True  True  True ...  True  True  True]]
于 2020-10-06T10:38:30.403 回答