algorithm - 最小化矩阵中的块

Question

假设我有以下矩阵：

原始矩阵

矩阵可以分解成块，使得对于所有行，每个块必须具有相同数量的列，其中该行的值被标记为真。

例如，以下块是有效的：

有效块示例 1

这意味着行不必是连续的。

列也不必是连续的，因为以下是有效的块：

有效块示例 2

但是，以下内容无效：

无效的块示例

也就是说，有什么算法可以用来选择块，以便在找到所有块时使用最少数量的块？

鉴于上面的示例，正确的解决方案是（具有相同颜色的项目表示有效块）：

分块问题的解决方案1

在上面的示例中，三个是可以分解成的最小块数。

请注意，以下也是有效的解决方案：

分块问题2的解决方案

实际上，解决方案并没有偏好，只是为了获得最少数量的块。

我想过使用相邻的单元格进行计数，但这并不能说明列值不必连续的事实。

我相信关键在于在给定约束的情况下找到面积最大的块，删除这些项目，然后重复。

采用这种方法，解决方案是：

分块问题3的解决方案

但是如何遍历矩阵并找到最大的区域却让我望而却步。

另请注意，如果您想在操作期间重新调整行和/或列，这是一个有效的操作（为了找到最大的区域），但我想您只能在从矩阵（在找到一个区域并移动到下一个区域之后）。

score 2 · Accepted Answer

您正在对真值表进行电路最小化。对于 4x4 真值表，您可以使用K 映射。Quine-McCluskey 算法是一种可以处理更大真值表的泛化。

请记住，问题是 NP-Hard，因此根据真值表的大小，这个问题可能会迅速增长到难以处理的规模。

score 0 · Accepted Answer

This problem is strongly related to Biclustering, for which there are many efficient algorithms (and freely available implementations). Usually you will have to specify the number K of clusters you expect to find; if you don't have a good idea what K should be, you can proceed by binary search on K.

In case the biclusters don't overlap, you are done, otherwise you need to do some geometry to cut them into "blocks".

score 0 · Accepted Answer

我提出的解决方案相当简单，但非常耗时。

它可以分为4个主要步骤：

找到矩阵中的所有现有模式，
找到这些模式的所有可能组合，
删除所有不完整的模式集，
扫描剩余列表以获取具有最少元素数的集合

首先，下面的算法适用于列或行主矩阵。我选择了列作为解释，但您可以在方便时将其换成行，只要它在整个过程中保持一致即可。

答案附带的示例代码在 OCaml 中，但不使用该语言的任何特定功能，因此应该很容易移植到其他 ML 方言。

步骤1：

每一列都可以看作是一个位向量。请注意，可以通过交叉（即和ing）所有列或组成它的所有行，甚至是组合来构造模式（您在问题中称为块）。所以第一步实际上是关于产生行和列的所有组合（如果你愿意的话，矩阵的行和列的幂集），同时将它们相交，并过滤掉重复项。

我们考虑矩阵数据类型的以下接口：

 module type MATRIX = sig
    type t
    val w : int (* the width of the matrix *)
    val h : int  (* the height ........             *)
    val get : t -> int -> int -> bool  (* cell value getter *)

end

现在让我们看一下这一步的代码：

let clength = M.h
let rlength = M.w

(* the vector datatype used throughought the algorithm
   operator on this type are in the module V *)
type vector = V.t

(* a pattern description and comparison operators *)
module Pattern = struct
    type t = {
        w : int; (* width of thd pattern *)
        h : int; (* height of the pattern *)
        rows : vector; (* which rows of the matrix are used *)
        cols : vector; (* which columns... *)
    }
    let compare a b = Pervasives.compare a b
    let equal a b = compare a b = 0
end
(* pattern set : let us store patterns without duplicates *)
module PS = Set.Make(Pattern)

(* a simple recursive loop on @f @k times *)
let rec fold f acc k =
    if k < 0 
    then acc
    else fold f (f acc k) (pred k)

(* extract a column/row of the given matrix *)
let cr_extract mget len =
    fold (fun v j -> if mget j then V.set v j else v) (V.null len) (pred len)

let col_extract m i = cr_extract (fun j -> M.get m i j) clength
let row_extract m i = cr_extract (fun j -> M.get m j i) rlength

(* encode a single column as a pattern *)
let col_encode c i =
    { w = 1; h = count c; rows = V.set (V.null clength) i; cols = c }

let row_encode r i =
    { h = 1; w = count r; cols = V.set (V.null rlength) i; rows = r }

(* try to add a column to a pattern *)
let col_intersect p c i =
    let col = V.l_and p.cols c in
    let h = V.count col in
    if h > 0 
    then
        let row = V.set (V.copy p.rows) i in
        Some {w = V.count row; h = h; rows = row; clos = col}
    else None

let row_intersect p r i =
    let row = V.l_and p.rows r in
    let w = V.count row in
    if w > 0
    then
        let col = V.set (V.copy p.cols) i in
        Some { w = w; h = V.count col; rows = row; cols = col }
    else None

let build_patterns m =
    let bp k ps extract encode intersect =
        let build (l,k) =
            let c = extract m k in
            let u = encode c k in
            let fld p ps =
                match intersect p c k with
                      None         -> l
                    | Some npc -> PS.add npc ps
             in
             PS.fold fld (PS.add u q) q, succ k
        in
        fst (fold (fun res _ -> build res) (ps, 0) k)
    in
    let ps = bp (pred rlength) PS.empty col_extract col_encode col_intersect in
    let ps = bp (pred clength) ps row_extract row_encode row_intersect in
    PS.elements ps

V 模块必须符合以下整个算法的签名：

module type V = sig
    type t
    val null : int -> t  (* the null vector, ie. with all entries equal to false *)
    val copy : t -> t (* copy operator *)
    val get : t -> int -> bool (* get the nth element *)
    val set : t -> int -> t (* set the nth element to true *)
    val l_and : t -> t -> t (* intersection operator, ie. logical and *)
    val l_or : t -> t -> t (* logical or *)
    val count : t -> int (* number of elements set to true *)
    val equal : t -> t -> bool (* equality predicate *)
end

第2步：

组合模式也可以看作是一个幂集结构，但有一些限制：一个有效的模式集可能只包含不重叠的模式。如果两种模式都包含至少一个公共矩阵单元，则后者可以定义为真。使用上面使用的模式数据结构，重叠谓词非常简单：

let overlap p1 p2 =
    let nullc = V.null h
    and nullr = V.null w in
    let o v1 v2 n = not (V.equal (V.l_and v1 v2) n) in
    o p1.rows p2.rows nullr && o p1.cols p2.cols nullc

模式记录的cols和rows指示矩阵中的哪些坐标包含在模式中。因此，两个字段上的逻辑和将告诉我们模式是否重叠。

为了在模式集中包含模式，我们必须确保它不与集合中的任何模式重叠。

type pset = {
    n : int; (* number of patterns in the set *)
    pats : pattern list;
}

let overlap sp p =
    List.exists (fun x -> overlap x p) sp.pats

let scombine sp p =
    if overlap sp p
    then None
    else Some {
        n = sp.n + 1;
        pats = p::sp.pats;
    }

let build_pattern_sets l =
    let pset l p = 
        let sp = { n = 1; pats = [p] } in
        List.fold_left (fun l spx -> 
            match scombine spx p with
                None -> l
             | Some nsp -> nsp::l
        ) (sp::l) l
    in List.fold_left pset [] l

这一步会产生很多集合，因此非常占用内存和计算量。这当然是这个解决方案的弱点，但我还没有看到如何减少折叠。

第 3 步：

如果在用它重建矩阵时，我们没有获得原始的模式集，则该模式集是不完整的。所以这个过程相当简单。

let build_matrix ps w =
    let add m p =
        let rec add_col p i = function
            | []     -> []
            | c::cs -> 
                let c = 
                    if V.get p.rows i
                    then V.l_or c p.cols
                    else c
                in c::(add_col p (succ i) cs)
        in add_col p 0 m
    in
    (* null matrix as a list of null vectors *)
    let m = fold (fun l _ -> V.null clength::l) [] (pred rlength) in
    List.fold_left add m ps.pats

let drop_incomplete_sets m l =
    (* convert the matrix to a list of columns *)
    let m' = fold (fun l k -> col_extract m k ::l) [] (pred rlength) in
    let complete m sp =
        let m' = build_matrix sp in
        m = m'
    in List.filter (fun x -> complete m' x) l

第4步：

最后一步就是选择元素数量最少的集合：

let smallest_set l =
    let smallest ps1 ps2 = if ps1.n < ps2.n then ps1 else ps2 in
    match l with
        | []    -> assert false (* there should be at least 1 solution *)
        | h::t  -> List.fold_left smallest h t

整个计算就是每个步骤的链接：

let compute m =
   let (|>) f g = g f in
   build_patterns m |> build_pattern_sets |> drop_incomplete_sets m |> smallest_set

笔记

上面的算法构造了一个幂集的幂集，并带有一些有限的过滤。据我所知，没有减少搜索的方法（如评论中所述，如果这是一个 NP 难题，则没有）。

该算法检查所有可能的解决方案，并正确返回一个最佳解决方案（用许多矩阵测试，包括问题描述中给出的那个。

关于您在问题中提出的启发式的快速评论：

它可以使用第一步轻松实现，删除找到的最大模式并递归。这将比我的算法更快地产生解决方案。但是，找到的解决方案可能不是最佳的。

例如，考虑以下矩阵：

.x...
.xxx
xxx.
...x.

中央 4 细胞块是可能找到的最大的，但使用它的集合将总共包含 5 个模式。

.1...
.223
422.
...5.

然而这个解决方案只使用了 4 个：

.1...
.122
334.
...4.

更新：

链接到我为此答案编写的完整代码。

algorithm - 最小化矩阵中的块

3 回答 3

步骤1：

第2步：

第 3 步：

第4步：

笔记

更新：

Related

Reference