python - 显示特定直方图（不正常）

Question

我正在尝试将具有特定 a 和 b 参数的截断正态分布覆盖在从相同分布生成的样本直方图上。

我如何适应 truncnorm(a,b) 的 pdf？

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import matplotlib.mlab as mlab

from IPython.display import Math, Latex
# for displaying images
from IPython.core.display import Image
# import seaborn
import seaborn as sns
# settings for seaborn plotting style
sns.set()
# settings for seaborn plot sizes
sns.set(rc={'figure.figsize':(5,5)})

tempdist=[]

samples=100

for k in range(1,samples):
    #Probability
    #Storage temp as truncated normal
    #temperature as normal mean 55 with 5F variation
    storagetempfarenht = 57 #55
    storagetempkelvin = (storagetempfarenht + 459.67) * (5.0/9.0)
    highesttemp=storagetempfarenht + 5
    lowesttemp= storagetempfarenht -5
    sigma = ((highesttemp + 459.67) * (5.0/9.0)) - storagetempkelvin
    mu, sigma = storagetempkelvin, sigma
    lower, upper = mu-2*sigma , mu+2*sigma
    a=(lower - mu) / sigma
    b=(upper - mu) / sigma
    temp =stats.truncnorm.rvs(a, b, loc=mu, scale=sigma, size=1)
    mean, var, skew, kurt = stats.truncnorm.stats(a, b, moments='mvsk')

    tempdist.append(temp)

#Theses are the randomly generated values
tempdist=np.array(tempdist)

x = range(250,350)

ax = sns.distplot(tempdist,
                  bins=500,
                  kde=True,
                  color='r',
                  fit=stats.truncnorm,
                  hist_kws={"linewidth": 15,'alpha':1})
ax.set(xlabel='Trunc Normal Distribution', ylabel='Frequency')

score 0 · Accepted Answer

的拟合参数sns.distplot不适用于truncnorm，至少在数据有限的情况下不适用。truncnorm.fit需要一些猜测，并且distplot不知道如何提供它们。

这篇文章解释了如何手动安装truncnorm. 下面的代码仅truncnorm.pdf使用初始参数绘制。要获得拟合参数，您可以使用链接帖子中的代码。

一些备注：

许多 numpy（和 scipy）函数对完整的数组进行操作，并且可以一次生成完整的数组。例如stats.truncnorm.rvs(..., size=N)，生成一个包含 N 个样本的数组。
设置bins=500将distplot创建 500 个直方图箱，当只有 100 个样本时，这并没有真正的帮助。
kde=True绘制估计分布的 pdf；默认这是高斯核的总和；bins 越多，kde 越遵循数据的细节（而不是其一般形式）
设置"linewidth": 15在hist_kws15 像素宽的直方图条周围创建线条。这比条本身宽得多，导致情节看起来很奇怪。最好将线宽设置为 1 左右。
在 Python 中，for k in range(1,samples)运行 sample-1 次。这与 Python 以 0 而不是 1 开始数组索引有关。

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import matplotlib.mlab as mlab
import seaborn as sns
sns.set()
sns.set(rc={'figure.figsize': (5, 5)})

samples = 100

storagetempfarenht = 57  # 55
storagetempkelvin = (storagetempfarenht + 459.67) * (5.0 / 9.0)
highesttemp = storagetempfarenht + 5
lowesttemp = storagetempfarenht - 5
sigma = ((highesttemp + 459.67) * (5.0 / 9.0)) - storagetempkelvin
mu, sigma = storagetempkelvin, sigma
lower, upper = mu - 2 * sigma, mu + 2 * sigma
a = (lower - mu) / sigma
b = (upper - mu) / sigma
temp = stats.truncnorm.rvs(a, b, loc=mu, scale=sigma, size=1)
# mean, var, skew, kurt = stats.truncnorm.stats(a, b, moments='mvsk')

# Theses are the randomly generated values
tempdist = stats.truncnorm.rvs(a, b, loc=mu, scale=sigma, size=samples)

ax = sns.distplot(tempdist,
                  # bins=10,  # 10 bins is the default
                  kde=True,
                  color='r',
                  # fit=stats.truncnorm, # doesn't work for truncnorm
                  hist_kws={"linewidth": 1, 'alpha': 1, 'label': 'Histogram'},
                  kde_kws={"linewidth": 2, 'alpha': 1, 'color': 'dodgerblue', 'label': 'Estimated kde'})
ax.set(xlabel='Trunc Normal Distribution', ylabel='Frequency')
x_left, x_right = ax.get_xlim()
x = np.linspace(x_left, x_right, 500)
y = stats.truncnorm.pdf(x, a, b, loc=mu, scale=sigma)
ax.plot(x, y, color='limegreen', label='Given truncnormal')
# for xi in (lower, upper):   # optionally draw vertical lines at upper and lower
#    ax.axvline(xi, linestyle=':', color='limegreen')
plt.legend()
plt.tight_layout()
plt.show()

python - 显示特定直方图（不正常）

1 回答 1

Related

Reference