python - 查找 k 最近邻图的连通分量的数量？

Question

是否有一种使用预先计算的 KDTree 查找连接组件数量的优雅方法？现在使用呼吸优先搜索算法找到连接的组件，其邻接矩阵由 k 最近邻的 KDTree 给出，但是有更好的可能性吗？

import collections
import numpy as np
from sklearn import neighbors

N = 100
N_k = 8
ra = np.random.random
X0,X1 = ra(N),ra(N)
X0[0:N//2]+= 2
X1[0:N//2]+= 2


X = np.array([X0,X1]).T
tree = neighbors.KDTree(X)              
dist, adj = tree.query(X, k = N_k+1)
dist = dist[:,1::]
adj  =  adj[:,1::]

print("Inside of find_components_lifo")
print("N = %d/ N_k = %d"%(N,N_k))
labels = np.zeros(N, dtype = np.int) - 1
n = 0
steps = 0 
remains = (labels == -1)
while n < N:
    i = np.arange(0,N,1)[remains][np.random.randint(0,N - n)]
    # This is important for directed graphs
    labels[i] = i
    lifo = collections.deque([i])
    while lifo:
        ele = lifo.pop()
        for k in adj[ele,:]:
            if labels[k] == -1:
                labels[k] = labels[i]
                lifo.append(k)
            elif labels[k] != labels[i]:
                labels[labels == labels[i]] = labels[k]
    remains = (labels == -1)
    n = N - len(np.nonzero(remains)[0])
unique = np.unique(labels)
labels_ = np.zeros(N, dtype = np.int) - 1
for i, label in enumerate(unique):
    choice = (labels == label)
    N_cl = len(np.nonzero(choice)[0])
    print("We found a cluster with N = %d"%N_cl)
    labels_[choice] = i

import matplotlib.pyplot as plt
fig, ax = plt.subplots()
fixticks(ax)
plt.show()
colors_ = np.array(colors)
for i in range(N):
    for j in range(N_k):
        ax.plot([X0[i],X0[adj[i,j]]],[X1[i],X1[adj[i,j]]], color = colors_[labels_[i]], alpha = 0.3)
ax.grid(True)
ax.scatter(X0,X1, s = 60, color = "Black")
plt.show()

我

score 2 · Accepted Answer

我认为您可以同时使用 scipyconnected_components和 scikit-learn kneighbors_graph。这会产生你正在寻找的东西吗？

from sklearn import neighbors
from scipy.sparse import csgraph
adj = neighbors.kneighbors_graph(X, N_k)
n_components, labels = csgraph.connected_components(adj)

python - 查找 k 最近邻图的连通分量的数量？

1 回答 1

Related

Reference