假设我有一个列表 L。如何在 K 组的所有分区上获得迭代器?

示例:L = [ 2,3,5,7,11, 13],K = 3


[ [ 2 ], [ 3, 5], [ 7,11,13] ]
[ [ 2,3,5 ], [ 7, 11], [ 13] ]
[ [ 3, 11 ], [ 5, 7], [ 2, 13] ]
[ [ 3 ], [ 11 ], [ 5, 7, 2, 13] ]

=== 更新 ===


# -*- coding: utf-8 -*-

import itertools 

# return ( list1 - list0 )
def l1_sub_l0( l1, l0 ) :
    """Substract two lists"""
    copy_l1 = list( l1 )
    copy_l0 = list( l0 )

    for xx in l0 :
        if copy_l1.count( xx ) > 0 :
            copy_l1.remove( xx )
            copy_l0.remove( xx )

    return [ copy_l1, copy_l0 ]

def gen_group_len( n, k ) :
    """Generate all possible group sizes"""

    # avoid doubles
    stop_list = []
    for t in itertools.combinations_with_replacement( xrange( 1, n - 1 ), k - 1 ) :
        last_n = n - sum( t )

        # valid group size
        if last_n  >= 1 :
            res = tuple( sorted( t + ( last_n, ) ) )
            if res not in stop_list :
                yield res
                stop_list.append( res )

# group_len = (1, 1, 3)

def gen( group_len, my_list ) :
    """Generate all possible partitions of all possible group sizes"""

    if len( group_len ) == 1 :
        yield ( tuple( my_list ), )

    else :

        # need for a stop list if 2 groups of same size
        stop_list = []

        for t in itertools.combinations( my_list, group_len[ 0 ] ) :
            reduced_list = l1_sub_l0( my_list, t )[ 0 ]

            for t2 in gen( group_len[ 1: ], reduced_list ) :
                tmp = set( ( t, t2[ 0 ] ) )
                if tmp not in stop_list :
                    yield ( t, ) + t2
                    # avoid doing same thing twice
                    if group_len[ 1 ] == group_len[ 0 ] :
                        stop_list.append( tmp )

my_list = [ 3,5,7,11,13 ]
n = len( my_list )
k = 3

group_len_list = list( gen_group_len( n, k ) )
print "for %i elements, %i configurations of group sizes" % ( n, len(  group_len_list ) )
print group_len_list

for group_len in group_len_list :
    print "group sizes", group_len
    for x in gen( group_len, my_list ) :
        print x
    print "==="


for 5 elements, 2 configurations of group sizes
[(1, 1, 3), (1, 2, 2)]
group sizes (1, 1, 3)
((3,), (5,), (7, 11, 13))
((3,), (7,), (5, 11, 13))
((3,), (11,), (5, 7, 13))
((3,), (13,), (5, 7, 11))
((5,), (7,), (3, 11, 13))
((5,), (11,), (3, 7, 13))
((5,), (13,), (3, 7, 11))
((7,), (11,), (3, 5, 13))
((7,), (13,), (3, 5, 11))
((11,), (13,), (3, 5, 7))
group sizes (1, 2, 2)
((3,), (5, 7), (11, 13))
((3,), (5, 11), (7, 13))
((3,), (5, 13), (7, 11))
((5,), (3, 7), (11, 13))
((5,), (3, 11), (7, 13))
((5,), (3, 13), (7, 11))
((7,), (3, 5), (11, 13))
((7,), (3, 11), (5, 13))
((7,), (3, 13), (5, 11))
((11,), (3, 5), (7, 13))
((11,), (3, 7), (5, 13))
((11,), (3, 13), (5, 7))
((13,), (3, 5), (7, 11))
((13,), (3, 7), (5, 11))
((13,), (3, 11), (5, 7))

def clusters(l, K):
    if l:
        prev = None
        for t in clusters(l[1:], K):
            tup = sorted(t)
            if tup != prev:
                prev = tup
                for i in xrange(K):
                    yield tup[:i] + [[l[0]] + tup[i],] + tup[i+1:]
        yield [[] for _ in xrange(K)]


def neclusters(l, K):
    for c in clusters(l, K):
        if all(x for x in c): yield c


def kamongn(n, k):
    res = 1
    for x in xrange(n-k, n):
        res *= x + 1
    for x in xrange(k):
        res /= x + 1
    return res

def Stirling(n, k):
    res = 0
    for j in xrange(k + 1):
        res += (-1)**(k-j) * kamongn(k, j) * j ** n
    for x in xrange(k):
        res /= x + 1
    return res

>>> sum(1 for _ in neclusters([2,3,5,7,11,13], K=3)) == Stirling(len([2,3,5,7,11,13]), k=3)

有用 !


>>> clust = neclusters([2,3,5,7,11,13], K=3)
>>> [clust.next() for _ in xrange(5)]
[[[2, 3, 5, 7], [11], [13]], [[3, 5, 7], [2, 11], [13]], [[3, 5, 7], [11], [2, 13]], [[2, 3, 11], [5, 7], [13]], [[3, 11], [2, 5, 7], [13]]]
import itertools
def neclusters(l, k):
    for labels in itertools.product(range(k), repeat=len(l)):
        partition = [[] for i in range(k)]
        for i, label in enumerate(labels):
        yield partition

与@val 的答案一样,可以将其包装以删除具有空集群的分区。

已编辑:正如@moose 所指出的,以下仅确定连续索引位于同一集群中的分区。对所有排列执行此分区将给出所寻求的答案。



import itertools
def neclusters(l, K):
    for splits in itertools.combinations(range(len(l) - 1), K - 1):
        # splits need to be offset by 1, and padded
        splits = [0] + [s + 1 for s in splits] + [None]
        yield [l[s:e] for s, e in zip(splits, splits[1:])]

numpysplit函数旨在使这些类型的分区给定拆分偏移量,因此这是生成 numpy 数组列表的替代方法:

import itertools
def neclusters(l, K):
    for splits in itertools.combinations(range(len(l) - 1), K - 1):
        yield np.split(l, 1 + np.array(splits))
import itertools as it

import more_itertools as mit

iterable = [2, 3, 5, 7, 11]
k = 3


res = [p for perm in it.permutations(iterable) for p in mit.partitions(perm) if len(p) == k]
# 720

# [[[2], [3], [5, 7, 11]],
#  [[2], [3, 5], [7, 11]],
#  [[2], [3, 5, 7], [11]],
#  [[2, 3], [5], [7, 11]],
#  [[2, 3], [5, 7], [11]],
#  [[2, 3, 5], [7], [11]],
#  ...
#  [[3], [2], [5, 7, 11]],
#  [[3], [2, 5], [7, 11]],
#  [[3], [2, 5, 7], [11]],
#  [[3, 2], [5], [7, 11]],
#  [[3, 2], [5, 7], [11]],
#  [[3, 2, 5], [7], [11]],
#  [[3], [2], [5, 11, 7]],
#  ...
# ]

此版本提供了置换输入的分区。可以包括重复元素的分区,例如[[3,], [5,], [7, 11, 13]] and [[7, 11, 13]], [3,], [5,]]

注意:more_itertools是第三方包。通过安装> pip install more_itertools

def kpartitions(l, k):
  import itertools
  if k == 1: yield [l]; return
  for i in range(1, len(l)-k+1+1):
    s = set(range(1, len(l)))
    for comb in itertools.combinations(s, i-1):
      for t in kpartitions([l[idx] for idx in s - set(comb)], k-1):
        yield [[l[0], *(l[idx] for idx in comb)], *t]
def stirlingsecond(n, k):
  import math
  return sum((-1 if (i & 1 != 0) else 1) * math.comb(k, i)*((k-i)**n)
    for i in range(k+1)) // math.factorial(k)
assert len(list(kpartitions([3,5,7,11,13], 3))) == stirlingsecond(5, 3)
assert len(list(kpartitions([2,3,5,7,11,13], 3))) == stirlingsecond(6, 3)

这是非常有效的,尽管它做了一些额外的工作来查找不在组合中的元素,因为 itertools.combinations 很方便,尽管编写一个组合函数来产生组合和那些不在组合中的元素可能会持续改进时间。

