python - 估计由一组点生成的图像区域（Alpha 形状？？）

Question

我在显示 2D 图像的示例 ASCII 文件中有一组点。我想估计这些点填充的总面积。该平面内的某些地方没有被任何点填充，因为这些区域已被屏蔽。我猜想估计面积可能是应用凹壳或alpha 形状。我尝试了这种方法来找到一个合适的alpha值，从而估计面积。

from shapely.ops import cascaded_union, polygonize
import shapely.geometry as geometry
from scipy.spatial import Delaunay
import numpy as np
import pylab as pl
from descartes import PolygonPatch
from matplotlib.collections import LineCollection
def plot_polygon(polygon):
    fig = pl.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    margin = .3
    x_min, y_min, x_max, y_max = polygon.bounds
    ax.set_xlim([x_min-margin, x_max+margin])
    ax.set_ylim([y_min-margin, y_max+margin])
    patch = PolygonPatch(polygon, fc='#999999',
                         ec='#000000', fill=True,
                         zorder=-1)
    ax.add_patch(patch)
    return fig
def alpha_shape(points, alpha):
    if len(points) < 4:
        # When you have a triangle, there is no sense
        # in computing an alpha shape.
        return geometry.MultiPoint(list(points)).convex_hull
    def add_edge(edges, edge_points, coords, i, j):
        """
        Add a line between the i-th and j-th points,
        if not in the list already
        """
        if (i, j) in edges or (j, i) in edges:
           # already added
           return
        edges.add( (i, j) )
        edge_points.append(coords[ [i, j] ])
    coords = np.array([point.coords[0]
                       for point in points])
    tri = Delaunay(coords)
    edges = set()
    edge_points = []
    # loop over triangles:
    # ia, ib, ic = indices of corner points of the
    # triangle
    for ia, ib, ic in tri.vertices:
        pa = coords[ia]
        pb = coords[ib]
        pc = coords[ic]
        # Lengths of sides of triangle
        a = np.sqrt((pa[0]-pb[0])**2 + (pa[1]-pb[1])**2)
        b = np.sqrt((pb[0]-pc[0])**2 + (pb[1]-pc[1])**2)
        c = np.sqrt((pc[0]-pa[0])**2 + (pc[1]-pa[1])**2)
        # Semiperimeter of triangle
        s = (a + b + c)/2.0
        # Area of triangle by Heron's formula
        area = np.sqrt(s*(s-a)*(s-b)*(s-c))
        circum_r = a*b*c/(4.0*area)
        # Here's the radius filter.
        #print circum_r
        if circum_r < 1.0/alpha:
                add_edge(edges, edge_points, coords, ia, ib)
                add_edge(edges, edge_points, coords, ib, ic)
                add_edge(edges, edge_points, coords, ic, ia)
        m = geometry.MultiLineString(edge_points)
        triangles = list(polygonize(m))
        return cascaded_union(triangles), edge_points
points=[]
with open("test.asc") as f:
     for line in f:
         coords=map(float,line.split(" "))
         points.append(geometry.shape(geometry.Point(coords[0],coords[1])))
         print geometry.Point(coords[0],coords[1])
x = [p.x for p in points]
y = [p.y for p in points]
pl.figure(figsize=(10,10))
point_collection = geometry.MultiPoint(list(points))
point_collection.envelope
convex_hull_polygon = point_collection.convex_hull
_ = plot_polygon(convex_hull_polygon)
_ = pl.plot(x,y,'o', color='#f16824')
concave_hull, edge_points = alpha_shape(points, alpha=0.001)
lines = LineCollection(edge_points)
_ = plot_polygon(concave_hull)       
_ = pl.plot(x,y,'o', color='#f16824')

我得到了这个结果，但我希望这种方法可以检测到中间的洞。

更新
这是我的真实数据的样子：

我的问题是估计上述形状面积的最佳方法是什么？我无法弄清楚这段代码不能正常工作出了什么问题？！！任何帮助将不胜感激。

score 10 · Accepted Answer

好的，这就是这个想法。Delaunay 三角剖分将生成不分青红皂白的大三角形。这也会有问题，因为只会生成三角形。

因此，我们将生成您可能称之为“模糊 Delaunay 三角剖分”的内容。我们将把所有的点放到一个 kd-tree 中，并且对于每个点p，查看它k最近的邻居。kd-tree 使这个速度更快。

对于这些k邻居中的每一个，找到到焦点的距离p。使用此距离生成权重。我们希望附近的点比更远的点更受青睐，因此exp(-alpha*dist)这里适合使用指数函数。使用加权距离来构建描述绘制每个点的概率的概率密度函数。

现在，从那个分布中抽取很多次。将经常选择附近的点，而不太经常选择较远的点。对于绘制的点，请记下为焦点绘制了多少次。结果是一个加权图，图中的每条边都连接附近的点，并根据选择对的频率加权。

现在，从图中剔除权重太小的所有边。这些是可能没有连接的点。结果如下所示：

现在，让我们将所有剩余的边缘都放入shapely中。然后我们可以通过缓冲它们将边缘转换为非常小的多边形。像这样：

用覆盖整个区域的大多边形来区分多边形将产生用于三角剖分的多边形。可能还要等一下。结果如下所示：

最后，剔除所有过大的多边形：

#!/usr/bin/env python

import numpy as np
import matplotlib.pyplot as plt
import random
import scipy
import scipy.spatial
import networkx as nx
import shapely
import shapely.geometry
import matplotlib

dat     = np.loadtxt('test.asc')
xycoors = dat[:,0:2]
xcoors  = xycoors[:,0] #Convenience alias
ycoors  = xycoors[:,1] #Convenience alias
npts    = len(dat[:,0]) #Number of points

dist = scipy.spatial.distance.euclidean

def GetGraph(xycoors, alpha=0.0035):
  kdt  = scipy.spatial.KDTree(xycoors)         #Build kd-tree for quick neighbor lookups
  G    = nx.Graph()
  npts = np.max(xycoors.shape)
  for x in range(npts):
    G.add_node(x)
    dist, idx = kdt.query(xycoors[x,:], k=10) #Get distances to neighbours, excluding the cenral point
    dist      = dist[1:]                      #Drop central point
    idx       = idx[1:]                       #Drop central point
    pq        = np.exp(-alpha*dist)           #Exponential weighting of nearby points
    pq        = pq/np.sum(pq)                 #Convert to a PDF
    choices   = np.random.choice(idx, p=pq, size=50) #Choose neighbors based on PDF
    for c in choices:                         #Insert neighbors into graph
      if G.has_edge(x, c):                    #Already seen neighbor
        G[x][c]['weight'] += 1                #Strengthen connection
      else:
        G.add_edge(x, c, weight=1)            #New neighbor; build connection
  return G

def PruneGraph(G,cutoff):
  newg      = G.copy()
  bad_edges = set()
  for x in newg:
    for k,v in newg[x].items():
      if v['weight']<cutoff:
        bad_edges.add((x,k))
  for b in bad_edges:
    try:
      newg.remove_edge(*b)
    except nx.exception.NetworkXError:
      pass
  return newg


def PlotGraph(xycoors,G,cutoff=6):
  xcoors = xycoors[:,0]
  ycoors = xycoors[:,1]
  G = PruneGraph(G,cutoff)
  plt.plot(xcoors, ycoors, "o")
  for x in range(npts):
    for k,v in G[x].items():
      plt.plot((xcoors[x],xcoors[k]),(ycoors[x],ycoors[k]), 'k-', lw=1)
  plt.show()


def GetPolys(xycoors,G):
  #Get lines connecting all points in the graph
  xcoors = xycoors[:,0]
  ycoors = xycoors[:,1]
  lines = []
  for x in range(npts):
    for k,v in G[x].items():
      lines.append(((xcoors[x],ycoors[x]),(xcoors[k],ycoors[k])))
  #Get bounds of region
  xmin  = np.min(xycoors[:,0])
  xmax  = np.max(xycoors[:,0])
  ymin  = np.min(xycoors[:,1])
  ymax  = np.max(xycoors[:,1])
  mls   = shapely.geometry.MultiLineString(lines)   #Bundle the lines
  mlsb  = mls.buffer(2)                             #Turn lines into narrow polygons
  bbox  = shapely.geometry.box(xmin,ymin,xmax,ymax) #Generate background polygon
  polys = bbox.difference(mlsb)                     #Subtract to generate polygons
  return polys

def PlotPolys(polys,area_cutoff):
  fig, ax = plt.subplots(figsize=(8, 8))
  for polygon in polys:
    if polygon.area<area_cutoff:
      mpl_poly = matplotlib.patches.Polygon(np.array(polygon.exterior), alpha=0.4, facecolor=np.random.rand(3,1))
      ax.add_patch(mpl_poly)
  ax.autoscale()
  fig.show()


#Functional stuff starts here

G = GetGraph(xycoors, alpha=0.0035)

#Choose a value that rips off an appropriate amount of the left side of this histogram
weights = sorted([v['weight'] for x in G for k,v in G[x].items()])
plt.hist(weights, bins=20);plt.show() 

PlotGraph(xycoors,G,cutoff=6)       #Plot the graph to ensure our cut-offs were okay. May take a while
prunedg = PruneGraph(G,cutoff=6)    #Prune the graph
polys   = GetPolys(xycoors,prunedg) #Get polygons from graph

areas = sorted(p.area for p in polys)
plt.plot(areas)
plt.hist(areas,bins=20);plt.show()

area_cutoff = 150000
PlotPolys(polys,area_cutoff=area_cutoff)
good_polys = ([p for p in polys if p.area<area_cutoff])
total_area = sum([p.area for p in good_polys])

score 9 · Accepted Answer

这里有一个想法：使用k-means clustering。

您可以在 Python 中完成此操作，如下所示：

from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

dat     = np.loadtxt('test.asc')
xycoors = dat[:,0:2]

fit = KMeans(n_clusters=2).fit(xycoors)

plt.scatter(dat[:,0],dat[:,1], c=fit.labels_)
plt.axes().set_aspect('equal', 'datalim')
plt.gray()
plt.show()

使用您的数据，这将产生以下结果：

现在，您可以获取顶部簇和底部簇的凸包并分别计算每个区域的面积。然后添加这些区域就成为了它们联合区域的估计量，但是，巧妙地避免了中间的洞。

要微调您的结果，您可以使用聚类数和算法的不同启动数（该算法是随机的，通常运行不止一次）。

例如，你问过两个集群是否总是会在中间留下洞。我已经使用以下代码进行了实验。我生成点的均匀分布，然后切出一个随机大小和定向的椭圆来模拟一个洞。

#!/usr/bin/env python3

import sklearn
import sklearn.cluster
import numpy as np
import matplotlib.pyplot as plt

PWIDTH  = 6
PHEIGHT = 6

def GetPoints(num):
  return np.random.rand(num,2)*300-150 #Centered about zero

def MakeHole(pts): #Chop out a randomly orientated and sized ellipse
  a = np.random.uniform(10,150)    #Semi-major axis
  b = np.random.uniform(10,150)    #Semi-minor axis
  h = np.random.uniform(-150,150)  #X-center
  k = np.random.uniform(-150,150)  #Y-center
  A = np.random.uniform(0,2*np.pi) #Angle of rotation
  surviving_points = []
  for pt in range(pts.shape[0]):
    x = pts[pt,0]
    y = pts[pt,1]
    if ((x-h)*np.cos(A)+(y-k)*np.sin(A))**2/a/a+((x-h)*np.sin(A)-(y-k)*np.cos(A))**2/b/b>1:
      surviving_points.append(pt)
  return pts[surviving_points,:]

def ShowManyClusters(pts,fitter,clusters,title):
  colors  = np.array([x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk'])
  fig,axs = plt.subplots(PWIDTH,PHEIGHT)
  axs     = axs.ravel()
  for i in range(PWIDTH*PHEIGHT):
    lbls = fitter(pts[i],clusters)
    axs[i].scatter(pts[i][:,0],pts[i][:,1], c=colors[lbls])
    axs[i].get_xaxis().set_ticks([])
    axs[i].get_yaxis().set_ticks([])
  plt.suptitle(title)
  #plt.show()
  plt.savefig('/z/'+title+'.png')

fitters = {
  'SpectralClustering':  lambda x,clusters: sklearn.cluster.SpectralClustering(n_clusters=clusters,affinity='nearest_neighbors').fit(x).labels_,
  'KMeans':              lambda x,clusters: sklearn.cluster.KMeans(n_clusters=clusters).fit(x).labels_,
  'AffinityPropagation': lambda x,clusters: sklearn.cluster.AffinityPropagation().fit(x).labels_,
}

np.random.seed(1)
pts = []
for i in range(PWIDTH*PHEIGHT):
  temp = GetPoints(300)
  temp = MakeHole(temp)
  pts.append(temp)

for name,fitter in fitters.items():
  for clusters in [2,3]:
    np.random.seed(1)
    ShowManyClusters(pts,fitter,clusters,"{0}: {1} clusters".format(name,clusters))

考虑 K-Means 的结果：

至少在我看来，当“洞”将数据分成两个单独的 blob 时，似乎使用两个集群的性能最差。（在这种情况下，当椭圆的方向与包含样本点的矩形区域的两条边缘重叠时会发生这种情况。）使用三个聚类可以解决大部分这些困难。

您还会注意到 K-means 在第 1 列第 3 行以及第 3 列第 4 行产生了一些违反直觉的结果。在这里回顾 sklearn 的集群方法可以看到以下比较图像：

由此看来，SpectralClustering 产生的结果似乎与我们想要的一致。对上述相同数据进行尝试可以解决上述问题（请参阅第 1 列，第 3 行和第 3 列，第 4 行）。

上述表明，对于大多数此类情况，具有三个集群的光谱聚类应该是足够的。

score 3 · Accepted Answer

尽管您似乎打算做一个凹形，但这里有一条非常快的替代路线，我认为这会给您一个非常稳定的读数：

创建一个作为参数的函数（int radiusOfInfluence）。在函数内部运行一个以它为半径的体素过滤器。然后只需将该圆的面积 (pi*AOI^2) 乘以云中剩余点的数量。这应该给你一个相对稳健的面积估计，并且对孔和奇怪的边缘非常有弹性。

需要考虑的一些事项：

- 这将为您提供一个正的区域超调，因为超出了一个半径的边缘。对此进行调整的修改可以是运行统计异常值去除过滤器（在反向模式下）以获取统计边缘点。然后可以假设每个边缘点的大约一半位于形状之外，在乘以面积之前从总计数中减去找到的点数的一半。

- 影响半径在很大程度上决定了此功能的孔洞检测，因为较大的半径将允许单点覆盖更大的区域，而且通过调整 stat 异常值过滤器上的 std 截止值，您可以更积极地检测内部孔洞并以这种方式调整您的区域.

它确实引出了你追求什么的问题，因为这更像是一个镜头准确性/镜头分组类型评估，假设有一组合理分布的样本。您的方法有点假设您的外边缘点是可能的绝对限制（根据情况，这可能是一个公平的假设）

编辑 - - - - - - - - - - - -

我没有时间写示例代码，但我可以进一步解释以帮助理解。

其核心是体素过滤器。非常简单，它在 x,y 坐标中设置一个种子点，然后在整个空间上创建一个网格，该网格在用户指定的过滤器半径的两个轴上都有单位（网格间距）。在每个网格框内，它将所有点平均到一个点。这对于这个概念非常重要，因为它几乎完全消除了重叠问题。

第二部分（逆统计异常值去除）只是收紧边缘合身的一点聪明之处。基本上，统计异常值是通过查看每个点到其 (k) 最近邻居的距离来消除噪声的。在为每个点生成到 k 个最近邻居的平均距离后，它会设置一个直方图，并且用户定义的参数充当保留或删除点的二进制阈值。当反转并设置为合理的截止值（~0.75 std 应该起作用）时，它将删除对象主体中的所有点（即只留下边缘点）。这很重要的原因是，从技术上讲，这些点超出了对象的边界 1 个半径。

请记住，归根结底，这只是给你一个数字。至于压力测试，我建议创建已知区域的人为点云，或者创建一个图形输出，显示你在哪里放置圆和半圆（如果你喜欢的话，朝向对象的内部）。

您需要转动以改进此方法的旋钮是：体素过滤器半径、每个点的影响区域（实际上可以与体素过滤器半径分开控制，尽管它们应该保持非常接近）、标准截止。

希望这有助于澄清，干杯！

score 2 · Accepted Answer

编辑：

我注意到您有自己的代码来计算 alpha 形状，并且 Delaunay 三角形的面积就在那里，因此计算形状的面积更加容易......

如果要将三角形添加到 alpha 形状的多边形中，只需添加三角形的区域。

如果要检测孔...添加辅助阈值以避免添加面积大于阈值的三角形。对于此示例，max_area = 99999 的值将删除该孔。

唯一的问题是您创建图形输出的方式，因为您看不到孔。

def alpha_shape(points, alpha, max_area):
    if len(points) < 4:
        # When you have a triangle, there is no sense
        # in computing an alpha shape.
        return geometry.MultiPoint(list(points)).convex_hull , 0
    def add_edge(edges, edge_points, coords, i, j):
        """
        Add a line between the i-th and j-th points,
        if not in the list already
        """
        if (i, j) in edges or (j, i) in edges:
           # already added
           return
        edges.add( (i, j) )
        edge_points.append(coords[ [i, j] ])
    coords = np.array([point.coords[0]
                       for point in points])
    tri = Delaunay(coords)
    total_area = 0
    edges = set()
    edge_points = []
    # loop over triangles:
    # ia, ib, ic = indices of corner points of the
    # triangle
    for ia, ib, ic in tri.vertices:
        pa = coords[ia]
        pb = coords[ib]
        pc = coords[ic]
        # Lengths of sides of triangle
        a = np.sqrt((pa[0]-pb[0])**2 + (pa[1]-pb[1])**2)
        b = np.sqrt((pb[0]-pc[0])**2 + (pb[1]-pc[1])**2)
        c = np.sqrt((pc[0]-pa[0])**2 + (pc[1]-pa[1])**2)
        # Semiperimeter of triangle
        s = (a + b + c)/2.0
        # Area of triangle by Heron's formula
        area = np.sqrt(s*(s-a)*(s-b)*(s-c))
        circum_r = a*b*c/(4.0*area)
        # Here's the radius filter.
        # print("radius", circum_r)
        if circum_r < 1.0/alpha and area < max_area:
                add_edge(edges, edge_points, coords, ia, ib)
                add_edge(edges, edge_points, coords, ib, ic)
                add_edge(edges, edge_points, coords, ic, ia)
                total_area += area

    m = geometry.MultiLineString(edge_points)
    triangles = list(polygonize(m))
    return cascaded_union(triangles), edge_points, total_area

这

老答案：

要计算不规则简单多边形的面积，可以使用Shoelace 公式和边界的 CCW 坐标作为输入。

如果您想检测云内部的孔洞，则必须移除外接半径大于第二阈值的德劳内三角形。理想情况是：计算 Delaunay 三角剖分并使用您当前的 alpha 形状进行过滤。然后，计算每个三角形的外接半径，并删除那些外接半径远大于平均外接半径的三角形。

要计算带孔的不规则多边形的面积，请对每个孔边界使用鞋带公式。以CCW（正）顺序输入外部边界以获得面积。然后以CW（负）顺序输入每个孔的边界，以获得面积（负）值。

python - 估计由一组点生成的图像区域（Alpha 形状？？）

4 回答 4

Related

Reference