我想计算并绘制给定样本 new_dO18 的累积分布函数 (CDF),然后在同一个图上用给定的平均值和标准差覆盖正态分布的 CDF。我在规范化 CDF 时遇到问题。我应该在 x 轴上具有介于 0 和 1 之间的值。有人可以指导我哪里出错了。我确信这是一个简单的修复,但我对 Python 很陌生。到目前为止,我已经包括了我的步骤。谢谢!
# Use np.histogram to get counts in each bin. See the help page or
# documentation on how to use this function, and what it returns.
# normalize the data new_dO18 using a for loop
norm_newdO18 = []
for element in new_dO18:
x = element
y = (x - np.mean(new_dO18))/np.std(new_dO18)
norm_newdO18.append(y)
print ('normalized dO18 values, excluding outliers:', norm_newdO18)
print()
# Use the histogram function to bin the data
num_bins = 20
counts, bin_edges = np.histogram(norm_newdO18, bins=num_bins, normed=0)
# Calculate and plot CDF of sample
cdf = np.cumsum(counts)
scale = 1.0/cdf[-1]
norm_cdf = scale * cdf
plt.plot(bin_edges[1:], norm_cdf, label = 'dO18 values')
plt.legend(bbox_to_anchor=(0, 1), loc='upper left', ncol=1)
plt.xlabel('normalized dO18 data')
plt.ylabel('frequency')
# Calculate and overlay the CDF of a normal distribution with sample mean and std
# as parameters.
# specific normally distributed function with mean and st. dev
mu, sigma = np.mean(new_dO18), np.std(new_dO18)
norm_theoretical = np.random.normal(mu, sigma, 1000)
# Calculate and plot CDF of theoretical sample
counts1, bin_edges1 = np.histogram(norm_theoretical, bins=20, normed=0)
cdft= np.cumsum(counts1)
scale = 1.0/cdft[-1]
norm_cdft = scale * cdf
plt.plot(bin_edges[1:], norm_cdft, label = 'theoretical values')
plt.legend(bbox_to_anchor=(0, 1), loc='upper left', ncol=1)
plt.show()