我正在尝试使用 PyMC3 估计 3 个补丁高斯混合模型的协方差。均值和协方差完全未知,权重为[1,1,1]
。对于均值估计,可以使用tt.stack([vx,vy])
构建适当的数量。但是对于协方差,我想[sigma_x, sigma_y, rho]
用作随机变量。我尝试用tt.stack
代码来构建对应的协方差张量
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt
k = 3 # number of patch
ndata = 500 # number of data
centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)
data = []
for i in v:
data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))
# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])
# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9
with pm.Model() as model:
# cluster sizes
p = tt.constant(np.array([1.,1.,1.]))
# cluster centers
vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)
category = pm.Categorical('category',
p=p,
shape=ndata)
means = tt.stack([vx, vy], axis=-1)
m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]
cov = tt.stack([m1, m2], axis=-1)
points = pm.MvNormal('obs',
mu=means[category],
cov=cov[category],
observed=data)
# sampling
with model:
step1 = pm.Metropolis()
step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
tr = pm.sample(100)
# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()
但它引发了错误:
Traceback (most recent call last):
File "/Users/huox/Downloads/t1108.py", line 51, in <module>
observed=data)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
dist = cls.dist(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
dist.__init__(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
lower=lower, *args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
raise ValueError('cov must be two dimensional.')
ValueError: cov must be two dimensional.
我该怎么做才能解决这个问题?还有其他有效的方法来估计协方差矩阵吗?