2

我有一些x,y数据,我使用scipy.stats.gaussian_kde函数获得了高斯核密度估计器(KDE)。我可以绘制它以显示 MWE 下方显示的等高线密度曲线。

这是 MWE 和结果图。

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Data.
x = [1.81,1.715,1.78,1.613,1.629,1.714,1.62,1.738,1.495,1.669,1.57,1.877,1.385,2.129,2.016,1.606,1.444,2.103,1.397,1.854,1.327,1.377,1.798,1.684,2.186,2.079,1.32,1.452,2.272,1.313,1.762,2.308,2.285,2.328,2.288,2.345,2.237,2.078,2.057,1.505,2.595,2.176,2.501,0.942,2.424,2.49,2.65,1.303,2.43,2.241,0.897,1.731,2.464,1.638,0.867,2.392,3.248,2.608,2.733,0.745,2.715,3.078,2.571,0.771,1.071,2.574,3.343,2.835,2.629,3.421,0.642,2.571,2.698,0.595,2.912,0.563,2.832,2.636,3.149,2.522,0.836,0.894,0.447,1.304,1.132,2.488,3.363,2.961,1.317,2.387,0.036,2.199,0.356,3.036,2.103,2.894,-0.097,0.069,2.688,-0.083,0.653,3.247,3.045,3.197,2.963,2.473,2.571,3.333,3.009,1.281,3.257,3.116,2.673,2.901,2.903,2.634,-0.291,-0.29,0.212]
y = [0.924,0.915,0.914,0.91,0.909,0.905,0.905,0.893,0.886,0.881,0.873,0.873,0.844,0.838,0.83,0.817,0.811,0.809,0.807,0.803,0.802,0.792,0.777,0.774,0.774,0.77,0.748,0.746,0.742,0.734,0.729,0.726,0.722,0.677,0.676,0.672,0.635,0.62,0.62,0.608,0.605,0.587,0.586,0.578,0.571,0.569,0.549,0.544,0.535,0.53,0.529,0.513,0.499,0.497,0.496,0.496,0.49,0.486,0.482,0.476,0.474,0.473,0.471,0.47,0.459,0.444,0.438,0.435,0.428,0.419,0.411,0.4,0.396,0.384,0.378,0.368,0.362,0.362,0.361,0.357,0.347,0.346,0.344,0.33,0.322,0.319,0.318,0.305,0.296,0.296,0.289,0.288,0.288,0.288,0.287,0.286,0.283,0.283,0.278,0.274,0.264,0.259,0.248,0.244,0.241,0.239,0.238,0.237,0.23,0.222,0.221,0.218,0.214,0.212,0.207,0.205,0.196,0.19,0.182]
xmin, xmax = min(x), max(x)
ymin, ymax = min(y), max(y)

# Generate KDE.
x1, y1 = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([x1.ravel(), y1.ravel()])
values = np.vstack([x, y])
kernel = stats.gaussian_kde(values)
kde = np.reshape(kernel(positions).T, x1.shape)

# Make plot.
plt.figure()
CS = plt.contour(x1,y1,kde)
plt.clabel(CS, inline=1, fontsize=10, zorder=6)
plt.show()

在此处输入图像描述

我对此输出的形状感兴趣。我特别需要的是一种方法来获取每条密度曲线的两个最远点x,y的坐标。例如,对于右下方的红色曲线,坐标将围绕:和(标记为黑色圆圈的点)。0.7(x1,y1)=(2.7,0.42)(x2,y2)=(2.9,0.29)

0.7由于存在具有相等密度值的曲线(即:两条值为 的红色曲线,两条值为 的橙色曲线等) ,这更加复杂,0.6因此我需要一种方法来区分这些曲线或省略某些曲线具有给定的值。

我不确定如何解决这个问题,任何帮助或指导将不胜感激。

4

1 回答 1

5

这是我们的解决方案:
检查处理部分...

from __future__ import division
import scipy as sp
from scipy import stats
import pylab as pl

x = [1.81,1.715,1.78,1.613,1.629,1.714,1.62,1.738,1.495,1.669,1.57,1.877,1.385,2.129, \
     2.016,1.606,1.444,2.103,1.397,1.854,1.327,1.377,1.798,1.684,2.186,2.079,1.32, \
     1.452,2.272,1.313,1.762,2.308,2.285,2.328,2.288,2.345,2.237,2.078,2.057,1.505, \
     2.595,2.176,2.501,0.942,2.424,2.49,2.65,1.303,2.43,2.241,0.897,1.731,2.464,1.638, \
     0.867,2.392,3.248,2.608,2.733,0.745,2.715,3.078,2.571,0.771,1.071,2.574,3.343, \
     2.835,2.629,3.421,0.642,2.571,2.698,0.595,2.912,0.563,2.832,2.636,3.149,2.522, \
     0.836,0.894,0.447,1.304,1.132,2.488,3.363,2.961,1.317,2.387,0.036,2.199,0.356, \
     3.036,2.103,2.894,-0.097,0.069,2.688,-0.083,0.653,3.247,3.045,3.197,2.963,2.473, \
     2.571,3.333,3.009,1.281,3.257,3.116,2.673,2.901,2.903,2.634,-0.291,-0.29,0.212]
y = [0.924,0.915,0.914,0.91,0.909,0.905,0.905,0.893,0.886,0.881,0.873,0.873,0.844, \
     0.838,0.83,0.817,0.811,0.809,0.807,0.803,0.802,0.792,0.777,0.774,0.774,0.77,0.748, \
     0.746,0.742,0.734,0.729,0.726,0.722,0.677,0.676,0.672,0.635,0.62,0.62,0.608,0.605, \
     0.587,0.586,0.578,0.571,0.569,0.549,0.544,0.535,0.53,0.529,0.513,0.499,0.497, \
     0.496,0.496,0.49,0.486,0.482,0.476,0.474,0.473,0.471,0.47,0.459,0.444,0.438,0.435, \
     0.428,0.419,0.411,0.4,0.396,0.384,0.378,0.368,0.362,0.362,0.361,0.357,0.347,0.346, \
     0.344,0.33,0.322,0.319,0.318,0.305,0.296,0.296,0.289,0.288,0.288,0.288,0.287, \
     0.286,0.283,0.283,0.278,0.274,0.264,0.259,0.248,0.244,0.241,0.239,0.238,0.237, \
     0.23,0.222,0.221,0.218,0.214,0.212,0.207,0.205,0.196,0.19,0.182]

xmin, xmax = min(x), max(x)
ymin, ymax = min(y), max(y)

# Generate KDE
x1, y1 = sp.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = sp.vstack([x1.ravel(), y1.ravel()])
values = sp.vstack([x, y])
kernel = stats.gaussian_kde(values)
kde = sp.reshape(kernel(positions).T, x1.shape)

# plotting
CS = pl.contour(x1,y1,kde)

# ----------------------------------- our solution ------------------------------------
# processing the distances
for i,clc in enumerate(CS.collections):
    for j,pth in enumerate(clc.get_paths()):
        cts = pth.vertices
        d = sp.spatial.distance.cdist(cts,cts)
        x,y = cts[list(sp.unravel_index(sp.argmax(d),d.shape))].T
        pl.plot(x,y,':o')
        print 'Contour Level %d, Part %d'%(i,j)
# ----------------------------------- our solution ------------------------------------

pl.clabel(CS, inline=1, fontsize=10, zorder=6)
pl.axis('image')                   # don't forget using this to fix aspect ratio to 1,1

pl.show()

在此处输入图像描述

于 2013-11-08T01:12:23.557 回答