SciPy 分布的fit
方法提供了参数的最大似然估计。你是对的,它只提供了适合形状、位置和比例的功能。(其实你说的是形状和尺度,但是 SciPy 还包括一个位置参数。有时这被称为三参数 gamma 分布。)
对于 SciPy 中的大多数分布,该fit
方法使用数值优化器来最小化该方法中定义的负对数似然nnlf
。fit
您可以使用几行代码自己完成此操作,而不是使用该方法。这允许您创建一个只有一个参数的目标函数,比如 shape k
,在该函数中, set theta = mean/k
,其中mean
是所需的均值,并调用gamma.nnlf
以评估负对数似然。这是您可以做到的一种方法:
import numpy as np
from scipy.stats import gamma
from scipy.optimize import fmin
def nll(k, mean, x):
return gamma.nnlf(np.array([k[0], 0, mean/k[0]]), x)
observations = [17.6, 24.9, 3.9, 17.6, 11.8, 10.4, 4.1, 11.7, 5.7, 1.6,
8.6, 12.9, 5.7, 8.0, 7.4, 1.2, 11.3, 10.4, 1.0, 1.9,
6.0, 9.3, 13.3, 5.4, 9.1, 4.0, 12.8, 11.1, 23.1, 4.2,
7.9, 11.1, 10.0, 3.4, 27.8, 7.2, 14.9, 2.9, 5.5, 7.0,
3.9, 12.3, 10.6, 22.1, 5.0, 4.1, 21.3, 15.9, 34.5, 8.1,
19.6, 10.8, 13.4, 22.8, 27.6, 6.8, 5.9, 9.0, 7.1, 21.2,
1.0, 14.6, 16.9, 1.0, 6.5, 2.9, 7.1, 14.1, 15.2, 7.8,
9.0, 4.9, 2.1, 9.5, 5.6, 11.1, 7.7, 18.3, 3.8, 11.0,
4.2, 12.5, 8.4, 3.2, 4.0, 3.8, 2.0, 24.7, 24.6, 3.4,
4.3, 3.2, 7.6, 8.3, 14.5, 8.3, 8.4, 14.0, 1.0, 9.0]
# This is the desired mean of the distribution.
mean = 11
# Initial guess for the shape parameter.
k0 = 3.0
opt = fmin(nll, k0, args=(mean, np.array(observations)),
xtol=1e-11, disp=False)
k_opt = opt[0]
theta_opt = mean / k_opt
print(f"k_opt: {k_opt:9.7f}")
print(f"theta_opt: {theta_opt:9.7f}")
此脚本打印
k_opt: 1.9712604
theta_opt: 5.5801861
或者,可以修改维基百科中显示的对数似然极值的一阶条件,以便只有一个参数k
。然后,极值的条件可以实现为一个标量方程,其根可以通过 找到scipy.optimize.fsolve
。以下是使用此技术的上述脚本的变体。
import numpy as np
from scipy.special import digamma
from scipy.optimize import fsolve
def first_order_eq(k, mean, x):
mean_logx = np.mean(np.log(x))
return (np.log(k) - digamma(k) + mean_logx - np.mean(x)/mean
- np.log(mean) + 1)
observations = [17.6, 24.9, 3.9, 17.6, 11.8, 10.4, 4.1, 11.7, 5.7, 1.6,
8.6, 12.9, 5.7, 8.0, 7.4, 1.2, 11.3, 10.4, 1.0, 1.9,
6.0, 9.3, 13.3, 5.4, 9.1, 4.0, 12.8, 11.1, 23.1, 4.2,
7.9, 11.1, 10.0, 3.4, 27.8, 7.2, 14.9, 2.9, 5.5, 7.0,
3.9, 12.3, 10.6, 22.1, 5.0, 4.1, 21.3, 15.9, 34.5, 8.1,
19.6, 10.8, 13.4, 22.8, 27.6, 6.8, 5.9, 9.0, 7.1, 21.2,
1.0, 14.6, 16.9, 1.0, 6.5, 2.9, 7.1, 14.1, 15.2, 7.8,
9.0, 4.9, 2.1, 9.5, 5.6, 11.1, 7.7, 18.3, 3.8, 11.0,
4.2, 12.5, 8.4, 3.2, 4.0, 3.8, 2.0, 24.7, 24.6, 3.4,
4.3, 3.2, 7.6, 8.3, 14.5, 8.3, 8.4, 14.0, 1.0, 9.0]
# This is the desired mean of the distribution.
mean = 11
# Initial guess for the shape parameter.
k0 = 3
sol = fsolve(first_order_eq, k0, args=(mean, observations),
xtol=1e-11)
k_opt = sol[0]
theta_opt = mean / k_opt
print(f"k_opt: {k_opt:9.7f}")
print(f"theta_opt: {theta_opt:9.7f}")
输出:
k_opt: 1.9712604
theta_opt: 5.5801861