python - 将所有分区迭代成k组？

Question

假设我有一个列表 L。如何在 K 组的所有分区上获得迭代器？

示例：L = [ 2,3,5,7,11, 13]，K = 3

3组所有可能分区的列表：

[ [ 2 ], [ 3, 5], [ 7,11,13] ]
[ [ 2,3,5 ], [ 7, 11], [ 13] ]
[ [ 3, 11 ], [ 5, 7], [ 2, 13] ]
[ [ 3 ], [ 11 ], [ 5, 7, 2, 13] ]
etc...

=== 更新 ===

我正在研究一个似乎有效的解决方案，所以我将复制粘贴它

# -*- coding: utf-8 -*-

import itertools 

# return ( list1 - list0 )
def l1_sub_l0( l1, l0 ) :
    """Substract two lists"""
    #
    copy_l1 = list( l1 )
    copy_l0 = list( l0 )

    #
    for xx in l0 :
        #
        if copy_l1.count( xx ) > 0 :
            #
            copy_l1.remove( xx )
            copy_l0.remove( xx )

    #
    return [ copy_l1, copy_l0 ]


#
def gen_group_len( n, k ) :
    """Generate all possible group sizes"""

    # avoid doubles
    stop_list = []
    #
    for t in itertools.combinations_with_replacement( xrange( 1, n - 1 ), k - 1 ) :
        #
        last_n = n - sum( t )

        # valid group size
        if last_n  >= 1 :
            res = tuple( sorted( t + ( last_n, ) ) )
            #
            if res not in stop_list :
                yield res
                stop_list.append( res )


# group_len = (1, 1, 3)

def gen( group_len, my_list ) :
    """Generate all possible partitions of all possible group sizes"""

    #
    if len( group_len ) == 1 :
        yield ( tuple( my_list ), )

    #
    else :

        # need for a stop list if 2 groups of same size
        stop_list = []

        #
        for t in itertools.combinations( my_list, group_len[ 0 ] ) :
            #
            reduced_list = l1_sub_l0( my_list, t )[ 0 ]

            #
            for t2 in gen( group_len[ 1: ], reduced_list ) :
                #
                tmp = set( ( t, t2[ 0 ] ) )
                #
                if tmp not in stop_list :
                    yield ( t, ) + t2
                    # avoid doing same thing twice
                    if group_len[ 1 ] == group_len[ 0 ] :
                        stop_list.append( tmp )


#
my_list = [ 3,5,7,11,13 ]
n = len( my_list )
k = 3

#
group_len_list = list( gen_group_len( n, k ) )
print "for %i elements, %i configurations of group sizes" % ( n, len(  group_len_list ) )
print group_len_list

#
for group_len in group_len_list :
    #
    print "group sizes", group_len
    #
    for x in gen( group_len, my_list ) :
        print x
    #
    print "==="

输出：

for 5 elements, 2 configurations of group sizes
[(1, 1, 3), (1, 2, 2)]
group sizes (1, 1, 3)
((3,), (5,), (7, 11, 13))
((3,), (7,), (5, 11, 13))
((3,), (11,), (5, 7, 13))
((3,), (13,), (5, 7, 11))
((5,), (7,), (3, 11, 13))
((5,), (11,), (3, 7, 13))
((5,), (13,), (3, 7, 11))
((7,), (11,), (3, 5, 13))
((7,), (13,), (3, 5, 11))
((11,), (13,), (3, 5, 7))
===
group sizes (1, 2, 2)
((3,), (5, 7), (11, 13))
((3,), (5, 11), (7, 13))
((3,), (5, 13), (7, 11))
((5,), (3, 7), (11, 13))
((5,), (3, 11), (7, 13))
((5,), (3, 13), (7, 11))
((7,), (3, 5), (11, 13))
((7,), (3, 11), (5, 13))
((7,), (3, 13), (5, 11))
((11,), (3, 5), (7, 13))
((11,), (3, 7), (5, 13))
((11,), (3, 13), (5, 7))
((13,), (3, 5), (7, 11))
((13,), (3, 7), (5, 11))
((13,), (3, 11), (5, 7))
===

score 3 · Accepted Answer

这行得通，尽管它可能非常无能（我将它们全部排序以避免重复计算）：

def clusters(l, K):
    if l:
        prev = None
        for t in clusters(l[1:], K):
            tup = sorted(t)
            if tup != prev:
                prev = tup
                for i in xrange(K):
                    yield tup[:i] + [[l[0]] + tup[i],] + tup[i+1:]
    else:
        yield [[] for _ in xrange(K)]

它还返回空簇，因此您可能需要包装它以仅获取非空簇：

def neclusters(l, K):
    for c in clusters(l, K):
        if all(x for x in c): yield c

计数只是为了检查：

def kamongn(n, k):
    res = 1
    for x in xrange(n-k, n):
        res *= x + 1
    for x in xrange(k):
        res /= x + 1
    return res

def Stirling(n, k):
    res = 0
    for j in xrange(k + 1):
        res += (-1)**(k-j) * kamongn(k, j) * j ** n
    for x in xrange(k):
        res /= x + 1
    return res

>>> sum(1 for _ in neclusters([2,3,5,7,11,13], K=3)) == Stirling(len([2,3,5,7,11,13]), k=3)
True

有用！

输出：

>>> clust = neclusters([2,3,5,7,11,13], K=3)
>>> [clust.next() for _ in xrange(5)]
[[[2, 3, 5, 7], [11], [13]], [[3, 5, 7], [2, 11], [13]], [[3, 5, 7], [11], [2, 13]], [[2, 3, 11], [5, 7], [13]], [[3, 11], [2, 5, 7], [13]]]

score 2 · Accepted Answer

这个问题的一个简单的替代视图是将三个集群标签之一分配给每个元素。

import itertools
def neclusters(l, k):
    for labels in itertools.product(range(k), repeat=len(l)):
        partition = [[] for i in range(k)]
        for i, label in enumerate(labels):
            partition[label].append(l[i])
        yield partition

与@val 的答案一样，可以将其包装以删除具有空集群的分区。

score 1 · Accepted Answer

已编辑：正如@moose 所指出的，以下仅确定连续索引位于同一集群中的分区。对所有排列执行此分区将给出所寻求的答案。

Itertools对于这种组合列表非常有用。首先，我们将您的任务视为选择k-1数组中所有不同分割点集的等效问题。这由itertools.combinations解决，它返回组合而不替换给定迭代中的特定大小，并且它返回的值按照它们在原始迭代中的顺序排列。

因此，您的问题可以通过以下方式解决：

import itertools
def neclusters(l, K):
    for splits in itertools.combinations(range(len(l) - 1), K - 1):
        # splits need to be offset by 1, and padded
        splits = [0] + [s + 1 for s in splits] + [None]
        yield [l[s:e] for s, e in zip(splits, splits[1:])]

numpy的split函数旨在使这些类型的分区给定拆分偏移量，因此这是生成 numpy 数组列表的替代方法：

import itertools
def neclusters(l, K):
    for splits in itertools.combinations(range(len(l) - 1), K - 1):
        yield np.split(l, 1 + np.array(splits))

score 1 · Accepted Answer

k使用more_itertools.partitions（注意尾随的“s”）过滤大小的分区：

给定

import itertools as it

import more_itertools as mit


iterable = [2, 3, 5, 7, 11]
k = 3

演示

res = [p for perm in it.permutations(iterable) for p in mit.partitions(perm) if len(p) == k]
len(res)
# 720

res
# [[[2], [3], [5, 7, 11]],
#  [[2], [3, 5], [7, 11]],
#  [[2], [3, 5, 7], [11]],
#  [[2, 3], [5], [7, 11]],
#  [[2, 3], [5, 7], [11]],
#  [[2, 3, 5], [7], [11]],
#  ...
#  [[3], [2], [5, 7, 11]],
#  [[3], [2, 5], [7, 11]],
#  [[3], [2, 5, 7], [11]],
#  [[3, 2], [5], [7, 11]],
#  [[3, 2], [5, 7], [11]],
#  [[3, 2, 5], [7], [11]],
#  [[3], [2], [5, 11, 7]],
#  ...
# ]

此版本提供了置换输入的分区。可以包括重复元素的分区，例如[[3,], [5,], [7, 11, 13]] and [[7, 11, 13]], [3,], [5,]]。

注意：more_itertools是第三方包。通过安装> pip install more_itertools。

score 0 · Accepted Answer

一种相当有效的方法是在每个递归中以第一个元素为中心以强制唯一性，并简单地通过所有增加大小的组合，直到它给出空子集。

def kpartitions(l, k):
  import itertools
  if k == 1: yield [l]; return
  for i in range(1, len(l)-k+1+1):
    s = set(range(1, len(l)))
    for comb in itertools.combinations(s, i-1):
      for t in kpartitions([l[idx] for idx in s - set(comb)], k-1):
        yield [[l[0], *(l[idx] for idx in comb)], *t]
def stirlingsecond(n, k):
  import math
  return sum((-1 if (i & 1 != 0) else 1) * math.comb(k, i)*((k-i)**n)
    for i in range(k+1)) // math.factorial(k)
assert len(list(kpartitions([3,5,7,11,13], 3))) == stirlingsecond(5, 3)
assert len(list(kpartitions([2,3,5,7,11,13], 3))) == stirlingsecond(6, 3)

这是非常有效的，尽管它做了一些额外的工作来查找不在组合中的元素，因为 itertools.combinations 很方便，尽管编写一个组合函数来产生组合和那些不在组合中的元素可能会持续改进时间。

python - 将所有分区迭代成k组？

5 回答 5

Related

Reference