这听起来像是一个“加权集团”问题:在一个网络中找到例如 r=5 人,具有最大兼容性/C(5,2) 对权重的最大总和。
谷歌“加权集团”算法 - “集团渗透”→ 3k 点击。
但我会采用 Justin Peel 的方法,因为它是可以理解和可控的
(取 n2 最好的对,从中选出最好的 n3 三元组……调整 n2 n3 ……以轻松权衡运行时间/结果质量。)
5 月 18 日添加,随后是实施的削减。
@Jose,看看什么 nbest[] 序列适合你会很有趣。
#!/usr/bin/env python
""" cliq.py: grow high-weight 2 3 4 5-cliques, taking nbest at each stage
weight ab = dist[a,b] -- a symmetric numpy array, diag << 0
weight abc, abcd ... = sum weight all pairs
C[2] = [ (dist[j,k], (j,k)) ... ] nbest[2] pairs
C[3] = [ (cliqwt(j,k,l), (j,k,l)) ... ] nbest[3] triples
...
run time ~ N * (N + nbest[2] + nbest[3] ...)
keywords: weighted-clique heuristic python
"""
# cf "graph clustering algorithm"
from __future__ import division
import numpy as np
__version__ = "denis 18may 2010"
me = __file__.split('/') [-1]
def cliqdistances( cliq, dist ):
return sorted( [dist[j,k] for j in cliq for k in cliq if j < k], reverse=True )
def maxarray2( a, n ):
""" -> max n [ (a[j,k], (j,k)) ...] j <= k, a symmetric """
jkflat = np.argsort( a, axis=None )[:-2*n:-1]
jks = [np.unravel_index( jk, a.shape ) for jk in jkflat]
return [(a[j,k], (j,k)) for j,k in jks if j <= k] [:n]
def _str( iter, fmt="%.2g" ):
return " ".join( fmt % x for x in iter )
#...............................................................................
def maxweightcliques( dist, nbest, r, verbose=10 ):
def cliqwt( cliq, p ):
return sum( dist[c,p] for c in cliq ) # << 0 if p in c
def growcliqs( cliqs, nbest ):
""" [(cliqweight, n-cliq) ...] -> nbest [(cliqweight, n+1 cliq) ...] """
# heapq the nbest ? here just gen all N * |cliqs|, sort
all = []
dups = set()
for w, c in cliqs:
for p in xrange(N):
# fast gen [sorted c+p ...] with small sorted c ?
cp = c + [p]
cp.sort()
tup = tuple(cp)
if tup in dups: continue
dups.add( tup )
all.append( (w + cliqwt(c, p), cp ))
all.sort( reverse=True )
if verbose:
print "growcliqs: %s" % _str( w for w,c in all[:verbose] ) ,
print " best: %s" % _str( cliqdistances( all[0][1], dist )[:10])
return all[:nbest]
np.fill_diagonal( dist, -1e10 ) # so cliqwt( c, p in c ) << 0
C = (r+1) * [(0, None)] # [(cliqweight, cliq-tuple) ...]
# C[1] = [(0, (p,)) for p in xrange(N)]
C[2] = [(w, list(pair)) for w, pair in maxarray2( dist, nbest[2] )]
for j in range( 3, r+1 ):
C[j] = growcliqs( C[j-1], nbest[j] )
return C
#...............................................................................
if __name__ == "__main__":
import sys
N = 100
r = 5 # max clique size
nbest = 10
verbose = 0
seed = 1
exec "\n".join( sys.argv[1:] ) # N= ...
np.random.seed(seed)
nbest = [0, 0, N//2] + (r - 2) * [nbest] # ?
print "%s N=%d r=%d nbest=%s" % (me, N, r, nbest)
# random graphs w cluster parameters ?
dist = np.random.exponential( 1, (N,N) )
dist = (dist + dist.T) / 2
for j in range( 0, N, r ):
dist[j:j+r, j:j+r] += 2 # see if we get r in a row
# dist = np.ones( (N,N) )
cliqs = maxweightcliques( dist, nbest, r, verbose )[-1] # [ (wt, cliq) ... ]
print "Clique weight, clique, distances within clique"
print 50 * "-"
for w,c in cliqs:
print "%5.3g %s %s" % (
w, _str( c, fmt="%d" ), _str( cliqdistances( c, dist )[:10]))