我希望能够在不将整个集合扩展到内存的情况下索引幂集的元素(a la itertools)
此外,我希望索引是基数排序的。所以索引 0 应该是空集,索引 2**n - 1 应该是所有元素
到目前为止,我发现的大多数文献都涉及以感应方式生成电源组。它不会让您直接进入任何索引。我对这个索引的动机是为分布式执行分割一个问题,如果远程机器可以直接潜入任何地方而不在集群中共享迭代器引用,这将很有帮助。
编辑:Blckknght 建议了我所追求的解决方案,如下所示
from scipy.misc import comb
def kcombination_to_index(combination):
index = 0
combination = sorted(combination)
for k, ck in enumerate(combination):
index += comb(ck, k+1, exact=True)
return index
def index_to_kcombination(index, k):
result = []
for k in reversed(range(1, k+1)):
n = 0
while comb(n, k, exact=True) <= index:
n +=1
result.append(n-1)
index -= comb(n-1, k, exact=True)
return result
class PowerSet:
def __init__(self, elements):
self.elements = elements
def __len__(self):
return 2 ** len(self.elements)
def __iter__(self):
for i in range(len(self)):
yield self[i]
def __getitem__(self, k):
if not isinstance(k, int):
raise TypeError
#k=0 is empty set,
#k= 1 - 1+n returns subsets of size 1
for subset_size in range(len(self.elements) + 1):
number_subsets = comb(len(self.elements), subset_size, exact=True)
if k >= number_subsets:
k -= number_subsets
else:
break
#we now want the kth element of a possible permutation of subset_size elements
indeces = index_to_kcombination(k, subset_size)
return map(lambda i: self.elements[i], indeces)
if __name__ == "__main__":
print "index of combination [8, 6, 3, 1, 0] is", kcombination_to_index([8, 6, 3, 1, 0])
print "5 combination at position 72 is", index_to_kcombination(72,5)
ps = PowerSet(["a", "b", "c", "d"])
for subset_idx in range(len(ps)):
print ps[subset_idx]