f# - 了解 F# 中的可变性：案例研究

Question

我是 F# 的初学者，这是我第一次尝试编写一些严肃的东西。很抱歉，代码有点长，但是我不明白可变性存在一些问题。

这是 Karger MinCut 算法的一个实现，用于计算无向图组件上的 mincut。我不会在这里讨论算法是如何工作的，更多信息https://en.wikipedia.org/wiki/Karger%27s_algorithm 重要的是它是一个随机算法，它运行确定数量的试运行，并采取“最佳”运行。

我现在意识到，如果我确实为每个随机试验构建了一个特定的函数，我可以避免下面的很多问题，但我想确切地了解下面的实现中有什么问题。

我在这个简单的图上运行代码（当我们将图切割成 2 个组件（1、2、3、4）和（5、6、7、8）时，最小切为 2，这两个组件之间只有 2 条边)

3--4-----5--6
|\/|     |\/|
|/\|     |/\|
2--1-----7--8

该文件simplegraph.txt应将此图编码如下（第一列 = 节点号，其他列 = 链接）

这段代码可能看起来太像命令式编程了，对此我很抱歉。

所以有一个 main for i 循环调用每个试验。第一次执行，（当 i=1 时）看起来很顺利和完美，但是当 i=2 时我有运行时错误执行，因为它看起来有些变量，比如 WG 没有正确重新初始化，导致超出范围的错误。

WG、WG1 和 WGmin 是字典对象 type wgraphobj的记录WG1 在主循环之外定义，我没有对 WG1 进行新的分配。 [但是它的类型是可变的，唉]

我用指令定义了第一个 WG

let mutable WG = WG1

然后在 for i 循环的开头，我写

WG <- WG1

然后，我在每次试验中修改 WG 对象以进行一些计算。当试验结束并且我们进入下一个试验（i 增加）时，我想将 WG 重置为其初始状态，如 WG1。但它似乎不起作用，我不明白为什么......

这是完整的代码

MyModule.fs [一些不需要执行的函数]

namespace MyModule

   module Dict =
      open System.Collections.Generic
      let toSeq d = d |> Seq.map (fun (KeyValue(k,v)) -> (k,v))
      let toArray (d:IDictionary<_,_>) = d |> toSeq |> Seq.toArray
      let toList (d:IDictionary<_,_>) = d |> toSeq |> Seq.toList
      let ofMap (m:Map<'k,'v>) = new Dictionary<'k,'v>(m) :> IDictionary<'k,'v>
      let ofList (l:('k * 'v) list) = new Dictionary<'k,'v>(l |> Map.ofList) :> IDictionary<'k,'v>
      let ofSeq (s:('k * 'v) seq) = new Dictionary<'k,'v>(s |> Map.ofSeq) :> IDictionary<'k,'v>
      let ofArray (a:('k * 'v) []) = new Dictionary<'k,'v>(a |> Map.ofArray) :> IDictionary<'k,'v>

卡格.fs

open MyModule.Dict

open System.IO

let x = File.ReadAllLines "\..\simplegraph.txt";;
// val x : string [] =

let splitAtTab (text:string)=
    text.Split [|'\t';' '|]

let splitIntoKeyValue (s:seq<'T>) = 
    (Seq.head s, Seq.tail s)

let parseLine (line:string)=
    line
    |> splitAtTab
    |> Array.filter (fun s -> not(s=""))
    |> Array.map (fun s-> (int s))
    |> Array.toSeq
    |> splitIntoKeyValue

let y =
    x |> Array.map parseLine

open System.Collections.Generic
// let graph = new Map <int, int array>
let graphD = new Dictionary<int,int seq>()
y |> Array.iter graphD.Add
let graphM = y |> Map.ofArray //immutable 

let N = y.Length // number of nodes
let Nruns = 2 

let remove_table = new Dictionary<int,bool>()
[for i in 1..N do yield (i,false)] |> List.iter remove_table.Add

// let remove_table = seq [|for a in 1 ..N -> false|] // plus court

let label_head_table = new Dictionary<int,int>()
[for i in 1..N do yield (i,i)] |> List.iter label_head_table.Add

let label = new Dictionary<int,int seq>()
[for i in 1..N do yield (i,[i])] |> List.iter label.Add

let mutable min_cut = 1000000

type wgraphobj =
     { Graph : Dictionary<int,int seq>
       RemoveTable : Dictionary<int,bool>
       Label : Dictionary<int,int seq>
       LabelHead : Dictionary<int,int> }

let WG1 = {Graph = graphD;
          RemoveTable = remove_table;
          Label = label;
          LabelHead = label_head_table}

let mutable WGmin = WG1

let IsNotRemoved x = // 
    match x with 
    | (i,false) -> true
    | (i,true)  -> false

let IsNotRemoved1 WG i = //
    (i,WG.RemoveTable.[i]) |>IsNotRemoved

let GetLiveNode d = 
    let myfun x =
        match x with
        | (i,b) -> i
    d |> toList |> List.filter IsNotRemoved |> List.map myfun

let rand = System.Random()
// subsets a dictionary given a sub_list of keys
let D_Subset (dict:Dictionary<'T,'U>) (sub_list:list<'T>) = 
    let z = Dictionary<'T,'U>() // create new empty dictionary
    sub_list |> List.filter (fun k -> dict.ContainsKey k)
             |> List.map (fun k -> (k, dict.[k]))
             |> List.iter (fun s -> z.Add s)
    z

// subsets a dictionary given a sub_list of keys to remove
let D_SubsetC (dict:Dictionary<'T,'U>) (sub_list:list<'T>) =
    let z = dict
    sub_list |> List.filter (fun k -> dict.ContainsKey k)
                          |> List.map (fun k -> (dict.Remove k)) |>ignore
    z

// subsets a sequence by values in a sequence
let S_Subset (S:seq<'T>)(sub_list:seq<'T>) =
    S |> Seq.filter (fun s-> Seq.exists (fun elem -> elem = s) sub_list)

let S_SubsetC (S:seq<'T>)(sub_list:seq<'T>) =
    S |> Seq.filter (fun s-> not(Seq.exists (fun elem -> elem = s) sub_list))

[<EntryPoint>]
let main argv = 
    let mutable u = 0
    let mutable v = 0
    let mutable r = 0
    let mutable N_cut = 1000000
    let mutable cluster_A_min = seq [0]
    let mutable cluster_B_min = seq [0]
    let mutable WG = WG1
    let mutable LiveNodeList = [0]

    // when i = 2, i encounter problems with mutability

    for i in 1 .. Nruns do
         WG <- WG1
         printfn "%d" i
         for k in 1..(N-2) do
             LiveNodeList <- GetLiveNode WG.RemoveTable
             r <- rand.Next(0,N-k)
             u <- LiveNodeList.[r] //selecting a live node
             let uuu  = WG.Graph.[u] |> Seq.map (fun s -> WG.LabelHead.[s] )
                                     |> Seq.filter (IsNotRemoved1 WG)
                                     |> Seq.distinct
             let n_edge =  uuu |> Seq.length
             let x = rand.Next(1,n_edge)
             let mutable ok = false //maybe we can take this out
             while not(ok) do
                  // selecting the edge from node u
                  v <- WG.LabelHead.[Array.get (uuu |> Seq.toArray) (x-1)]

                  let vvv = WG.Graph.[v]  |> Seq.map (fun s -> WG.LabelHead.[s] )
                                          |> Seq.filter (IsNotRemoved1 WG)
                                          |> Seq.distinct
                  let zzz = S_SubsetC (Seq.concat [uuu;vvv] |> Seq.distinct) [u;v]
                  WG.Graph.[u] <- zzz

                  let lab_u = WG.Label.[u]
                  let lab_v = WG.Label.[v]
                  WG.Label.[u] <- Seq.concat [lab_u;lab_v] |> Seq.distinct

                  if (k<N-1) then 
                      WG.RemoveTable.[v]<-true
                      //updating Label_head for all members of Label.[v]
                      WG.LabelHead.[v]<- u
                      for j in WG.Label.[v] do
                          WG.LabelHead.[j]<- u

                  ok <- true
                  printfn "u= %d v=%d" u v
             // end of for k in 1..(N-2)
         // counting cuts
         // u,v contain the 2 indexes of groupings
         let cluster_A = WG.Label.[u]
         let cluster_B = S_SubsetC (seq[for i in 1..N do yield i]) cluster_A // defined as complementary of A
         // let WG2 = {Graph = D_Subset WG1.Graph (cluster_A |> Seq.toList)
         //          RemoveTable = remove_table
         //           Label = D_Subset WG1.Graph (cluster_A |> Seq.toList)
         //          LabelHead = label_head_table}
         let cross_edge = // returns keyvalue pair (k,S')
             let IsInCluster cluster (k,S) =
                 (k,S_Subset S cluster)                    
             graphM |> toSeq |> Seq.map (IsInCluster cluster_B)

         N_cut <-
             cross_edge |> Seq.map (fun (k:int,v:int seq)-> Seq.length v)
                        |> Seq.sum
         if (N_cut<min_cut) then
             min_cut <- N_cut
             WGmin <- WG
             cluster_A_min <- cluster_A
             cluster_B_min <- cluster_B
    // end of for i in 1..Nruns


    0 // return an integer exit code

算法描述：（我认为解决我的问题不太重要）

在每个试验中，有几个步骤。在每一步，我们将 2 个节点合并为 1 个，（有效地删除 1 个）更新图。我们这样做了 6 次，直到只剩下 2 个节点，我们将其定义为 2 个集群，我们查看这 2 个集群之间的交叉边数。如果我们“幸运”，那两个簇将是 (1,2,3,4) 和 (5,6,7,8) 并找到正确的切割数量。在每个步骤中，对象 WG 都会通过合并 2 个节点的效果进行更新，其中只有 LiveNode（由于合并 2 个节点而没有消除的节点）完美地保持最新状态。

WG.Graph 是更新后的图

WG.Label 包含已合并到当前节点的节点的标签

WG.LabelHead 包含该节点已合并到的节点的标签

WG.RemoveTable 表示节点是否已被删除。

提前感谢任何愿意看看它的人！

score 2 · Accepted Answer

“它似乎不起作用”，因为wgraphobj它是一个在堆栈上分配的引用类型，这意味着当你改变的内部结构时WG，你也在改变的内部结构WG1，因为它们是相同的内部结构。

如果你使用可变状态，这正是你让自己陷入的那种混乱。这就是为什么人们建议不要使用它的原因。特别是，您使用可变字典会破坏算法的稳健性。我建议改用 F# 自己的高效不可变字典（称为Map）。

现在，回应您关于WG.Graph <- GraphD给出编译错误的评论。

WG是可变的，但WG.Graph不是（但的内容又是WG.Graph可变的）。有区别，我试着解释一下。

WG是可变的，因为它指向某个类型的对象wgraphobj，但是您可以在程序的过程中使其指向另一个相同类型的对象。

WG.Graph，另一方面，是一个包装在里面 WG的字段。它指向某个类型的对象Dictionary<_,_>。你不能让它指向另一个对象。您可以创建不同wgraphobj的，其中的字段Graph指向不同的字典，但您不能更改Graph原始wgraphobj指向的字段的位置。

为了使字段Graph本身可变，您可以这样声明它：

  type wgraphobj = { 
    mutable Graph: Dictionary<int, int seq>
    ...

然后您将能够改变该字段：

  WG.Graph <- GraphD

请注意，在这种情况下，您不需要将值WG本身声明为mutable.

但是，在我看来，为了您的目的，您实际上可以创建一个更改wgraphobj字段的新实例Graph，并将其分配给可变引用WG：

  WG.Graph <- { WG with Graph = GraphD }

f# - 了解 F# 中的可变性：案例研究

1 回答 1

Related

Reference