您似乎在描述的是关联规则学习问题的改写,例如可以使用 Apriori 算法解决。
这是一个快速制作的示例,展示了该算法的基础知识。它可能需要一些改进,并且不确定是否没有错误。
它也没有要求它必须找到所有不同的列组合,这些组合提供最少的“x”行。它只使用“x”来更快地过滤所有解决方案,并最终返回一个包含至少“x”行的最大列数的解决方案。
from operator import itemgetter, or_
from itertools import combinations, starmap
from collections import Counter
from math import factorial
class Apriori:
def __init__(self, input, x):
self.input = input
self.cols = len(input[0])
self.rows = len(input)
self.x = x
def _get_freq_combs(self, comb):
return sum(map(lambda row:
sum([bool(e) for i, e in enumerate(row)
if i in comb]) // len(comb), self.input))
def _make_new_combs(self, combs, comb_size):
candidates = Counter(map(frozenset,
starmap(or_, combinations(combs, 2))))
possible_candidate_freq = factorial(comb_size) // factorial(2) \
// factorial(comb_size - 2)
for c, f in candidates.items():
if f == possible_candidate_freq:
yield c
def solve(self):
"""Returns a list of sets with the most common items, at least x."""
freq = [self._get_freq_combs([i]) for i in range(self.cols)]
most_freq = [{ind} for ind in range(self.cols) if freq[ind] >= x]
comb_size = 2
while most_freq:
old_freq = most_freq
most_freq = [c for c in self._make_new_combs(most_freq, comb_size)
if self._get_freq_combs(c) >= x]
comb_size += 1
return old_freq
if __name__ == '__main__':
input = [[1, 0, 1, 0],
[0, 1, 1, 0],
[0, 1, 1, 1],
[0, 0, 0, 1]]
x = 2
apriori = Apriori(input, x)
print(apriori.solve())