6

我找到了一篇文章:
在 F# 中使用 continuation-passing style with memoization 解决 0-1 背包问题

关于在 F# 中实现的背包问题。当我学习这门语言时,我发现这真的很有趣,并试图对此进行一些调查。这是我制作的代码:

open System
open System.IO 
open System.Collections.Generic

let parseToTuple (line : string) =
    let parsedLine = line.Split(' ') |> Array.filter(not << String.IsNullOrWhiteSpace)         |> Array.map Int32.Parse
    (parsedLine.[0], parsedLine.[1])

let memoize f =
    let cache = Dictionary<_, _>()
    fun x ->
        if cache.ContainsKey(x)
            then cache.[x]
        else
            let res = f x
            cache.[x] <- res
            res

type Item =
    {
        Value : int
        Size  : int
    }  

type ContinuationBuilder() = 
    member b.Bind(x, f) = fun k -> x (fun x -> f x k)
    member b.Return x = fun k ->  k x
    member b.ReturnFrom x = x

let cont = ContinuationBuilder()

let set1 =
    [
        (4, 11)
        (8, 4)
        (10, 5)
        (15, 8)
        (4, 3)
    ]

let set2 =
    [
        (50, 341045); (1906, 4912); (41516, 99732); (23527, 56554); (559, 1818); (45136, 108372); (2625, 6750); (492, 1484)
        (1086, 3072); (5516, 13532); (4875, 12050); (7570, 18440); (4436, 10972); (620, 1940); (50897, 122094); (2129, 5558)
        (4265, 10630); (706, 2112); (2721, 6942); (16494, 39888); (29688, 71276); (3383, 8466); (2181, 5662); (96601, 231302)
        (1795, 4690); (7512, 18324); (1242, 3384); (2889, 7278); (2133, 5566); (103, 706); (4446, 10992); (11326, 27552)
        (3024, 7548); (217, 934); (13269, 32038); (281, 1062); (77174, 184848); (952, 2604); (15572, 37644); (566, 1832)
        (4103, 10306); (313, 1126); (14393, 34886); (1313, 3526); (348, 1196); (419, 1338); (246, 992); (445, 1390)
        (23552, 56804); (23552, 56804); (67, 634)
    ]

[<EntryPoint>] 
let main args =
    // prepare list of items from a file args.[0]
    let header, items = set1
                        |> function
                           | h::t -> h, t
                           | _    -> raise (Exception("Wrong data format"))

    let N, K = header
    printfn "N = %d, K = %d" N K
    let items = List.map (fun x -> {Value = fst x ; Size = snd x}) items |> Array.ofList

    let rec combinations =
        let innerSolver key =
            cont
                {
                    match key with
                    | (i, k) when i = 0 || k = 0        -> return 0
                    | (i, k) when items.[i-1].Size > k  -> return! combinations (i-1, k)
                    | (i, k)                            -> let item = items.[i-1]
                                                           let! v1 = combinations (i-1, k)
                                                           let! beforeItem = combinations (i-1, k-item.Size)
                                                           let v2 = beforeItem + item.Value
                                                           return max v1 v2
                }
        memoize innerSolver

    let res = combinations (N, K) id
    printfn "%d" res
    0

然而,这个实现的问题是它非常慢(实际上我无法解决 50 个项目和约 300000 容量的问题,我在 C# 中的幼稚实现在不到 1 秒的时间内就解决了这个问题)。

如果我在某个地方犯了错误,你能告诉我吗?或者也许实现是正确的,这只是解决这个问题的低效方法。

4

2 回答 2

7

当您天真地应用这样的通用记忆器并使用延续传递时,记忆缓存中的值是continuations,而不是常规的“最终”结果。因此,当您获得缓存命中时,您不会返回最终结果,而是返回一些函数,该函数承诺在您调用它时计算结果。此调用可能很昂贵,可能会调用各种其他延续,可能最终会再次访问 memoization 缓存,等等。

有效地记忆连续传递函数,以便 a)缓存充分发挥作用,b)函数保持尾递归是相当困难的。阅读讨论并在您完全理解所有内容后回来。;-)

您链接的博客文章的作者正在使用一个更复杂、更通用的记忆器,它特别适合这个问题。诚然,我还没有完全理解它(博客上的代码不完整/损坏,所以很难尝试),但我认为它的要点是它在缓存最终整数之前“强制”延续链结果。

为了说明这一点,这里对您的代码进行了快速重构,该代码完全自包含并跟踪相关信息:

open System
open System.Collections.Generic

let mutable cacheHits = 0
let mutable cacheMisses = 0

let memoize f =
    let cache = Dictionary<_, _>()
    fun x ->
        match cache.TryGetValue(x) with
        | (true, v) -> 
            cacheHits <- cacheHits + 1
            printfn "Hit for %A - Result is %A" x v
            v
        | _ ->
            cacheMisses <- cacheMisses + 1
            printfn "Miss for %A" x
            let res = f x
            cache.[x] <- res
            res

type Item = { Value : int; Size  : int }  

type ContinuationBuilder() = 
    member b.Bind(x, f) = fun k -> x (fun x -> f x k)
    member b.Return x = fun k ->  k x
    member b.ReturnFrom x = x

let cont = ContinuationBuilder()

let genItems n = 
   [| for i = 1 to n do
         let size = i % 5
         let value = (size * i)
         yield { Value = value; Size = size }
   |]

let N, K = (5, 100)
printfn "N = %d, K = %d" N K

let items = genItems N

let rec combinations_cont =
    memoize (
     fun key ->
       cont {
                match key with
                | (0, _) | (_, 0)                   -> return 0
                | (i, k) when items.[i-1].Size > k  -> return! combinations_cont (i - 1, k) 
                | (i, k)                            -> let item = items.[i-1]
                                                       let! v1 = combinations_cont (i-1, k)
                                                       let! beforeItem = combinations_cont (i-1, k - item.Size)
                                                       let v2 = beforeItem + item.Value
                                                       return max v1 v2
        }
    )

let res = combinations_cont (N, K) id
printfn "Answer: %d" res
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""

let rec combinations_plain =
    memoize (
     fun key ->
                match key with
                | (i, k) when i = 0 || k = 0        -> 0
                | (i, k) when items.[i-1].Size > k  -> combinations_plain (i-1, k) 
                | (i, k)                            -> let item = items.[i-1]
                                                       let v1 = combinations_plain (i-1, k)
                                                       let beforeItem = combinations_plain (i-1, k-item.Size)
                                                       let v2 = beforeItem + item.Value
                                                       max v1 v2
    )

cacheHits <- 0
cacheMisses <- 0

let res2 = combinations_plain (N, K)
printfn "Answer: %d" res2
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses

如您所见,CPS 版本正在缓存延续(不是整数),并且在调用延续时会有很多额外的活动在最后进行。

如果您将问题大小提高到let (N, K) = (20, 100)(并删除printfnmemoizer 中的语句),您将看到 CPS 版本最终进行了超过 100 万次缓存查找,而普通版本仅进行了几百次。

于 2013-07-04T00:02:30.363 回答
6

在 FSI 中运行此代码:

open System
open System.Diagnostics
open System.Collections.Generic

let time f =
    System.GC.Collect()
    let sw = Stopwatch.StartNew()
    let r = f()
    sw.Stop()
    printfn "Took: %f" sw.Elapsed.TotalMilliseconds
    r

let mutable cacheHits = 0
let mutable cacheMisses = 0

let memoize f =
    let cache = Dictionary<_, _>()
    fun x ->
        match cache.TryGetValue(x) with
        | (true, v) -> 
            cacheHits <- cacheHits + 1
            //printfn "Hit for %A - Result is %A" x v
            v
        | _ ->
            cacheMisses <- cacheMisses + 1
            //printfn "Miss for %A" x
            let res = f x
            cache.[x] <- res
            res

type Item = { Value : int; Size  : int }  

type ContinuationBuilder() = 
    member b.Bind(x, f) = fun k -> x (fun x -> f x k)
    member b.Return x = fun k ->  k x
    member b.ReturnFrom x = x

let cont = ContinuationBuilder()

let genItems n = 
    [| for i = 1 to n do
            let size = i % 5
            let value = (size * i)
            yield { Value = value; Size = size }
    |]

let N, K = (80, 400)
printfn "N = %d, K = %d" N K

let items = genItems N

//let rec combinations_cont =
//    memoize (
//     fun key ->
//       cont {
//                match key with
//                | (0, _) | (_, 0)                   -> return 0
//                | (i, k) when items.[i-1].Size > k  -> return! combinations_cont (i - 1, k) 
//                | (i, k)                            -> let item = items.[i-1]
//                                                       let! v1 = combinations_cont (i-1, k)
//                                                       let! beforeItem = combinations_cont (i-1, k - item.Size)
//                                                       let v2 = beforeItem + item.Value
//                                                       return max v1 v2
//        }
//    )
//
//
//cacheHits <- 0
//cacheMisses <- 0

//let res = time(fun () -> combinations_cont (N, K) id)
//printfn "Answer: %d" res
//printfn "Memo hits: %d" cacheHits
//printfn "Memo misses: %d" cacheMisses
//printfn ""

let rec combinations_plain =
    memoize (
        fun key ->
                match key with
                | (i, k) when i = 0 || k = 0        -> 0
                | (i, k) when items.[i-1].Size > k  -> combinations_plain (i-1, k) 
                | (i, k)                            -> let item = items.[i-1]
                                                       let v1 = combinations_plain (i-1, k)
                                                       let beforeItem = combinations_plain (i-1, k-item.Size)
                                                       let v2 = beforeItem + item.Value
                                                       max v1 v2
    )

cacheHits <- 0
cacheMisses <- 0

printfn "combinations_plain"
let res2 = time (fun () -> combinations_plain (N, K))
printfn "Answer: %d" res2
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""

let recursivelyMemoize f =
    let cache = Dictionary<_, _>()
    let rec memoizeAux x =
        match cache.TryGetValue(x) with
        | (true, v) -> 
            cacheHits <- cacheHits + 1
            //printfn "Hit for %A - Result is %A" x v
            v
        | _ ->
            cacheMisses <- cacheMisses + 1
            //printfn "Miss for %A" x
            let res = f memoizeAux x
            cache.[x] <- res
            res
    memoizeAux

let combinations_plain2 =
    let combinations_plain2Aux combinations_plain2Aux key =
                match key with
                | (i, k) when i = 0 || k = 0        -> 0
                | (i, k) when items.[i-1].Size > k  -> combinations_plain2Aux (i-1, k) 
                | (i, k)                            -> let item = items.[i-1]
                                                       let v1 = combinations_plain2Aux (i-1, k)
                                                       let beforeItem = combinations_plain2Aux (i-1, k-item.Size)
                                                       let v2 = beforeItem + item.Value
                                                       max v1 v2
    let memoized = recursivelyMemoize combinations_plain2Aux
    fun x -> memoized x

cacheHits <- 0
cacheMisses <- 0

printfn "combinations_plain2"
let res3 = time (fun () -> combinations_plain2 (N, K))
printfn "Answer: %d" res3
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""

let recursivelyMemoizeCont f =
    let cache = Dictionary HashIdentity.Structural
    let rec memoizeAux x k =
        match cache.TryGetValue(x) with
        | (true, v) -> 
            cacheHits <- cacheHits + 1
            //printfn "Hit for %A - Result is %A" x v
            k v
        | _ ->
            cacheMisses <- cacheMisses + 1
            //printfn "Miss for %A" x
            f memoizeAux x (fun y ->
                cache.[x] <- y
                k y)
    memoizeAux

let combinations_cont2 =
    let combinations_cont2Aux combinations_cont2Aux key =
        cont {
                match key with
                | (0, _) | (_, 0)                   -> return 0
                | (i, k) when items.[i-1].Size > k  -> return! combinations_cont2Aux (i - 1, k) 
                | (i, k)                            -> let item = items.[i-1]
                                                       let! v1 = combinations_cont2Aux (i-1, k)
                                                       let! beforeItem = combinations_cont2Aux (i-1, k - item.Size)
                                                       let v2 = beforeItem + item.Value
                                                       return max v1 v2
        }
    let memoized = recursivelyMemoizeCont combinations_cont2Aux
    fun x -> memoized x id

cacheHits <- 0
cacheMisses <- 0

printfn "combinations_cont2"
let res4 = time (fun () -> combinations_cont2 (N, K))
printfn "Answer: %d" res4
printfn "Memo hits: %d" cacheHits
printfn "Memo misses: %d" cacheMisses
printfn ""

我得到这些结果:

N = 80, K = 400
combinations_plain
Took: 7.191000
Answer: 6480
Memo hits: 6231
Memo misses: 6552

combinations_plain2
Took: 6.310800
Answer: 6480
Memo hits: 6231
Memo misses: 6552

combinations_cont2
Took: 17.021200
Answer: 6480
Memo hits: 6231
Memo misses: 6552
  • combinations_plain来自拉特金的回答。
  • combinations_plain2显式地公开递归记忆步骤。
  • combinations_cont2将递归记忆功能改编为记忆延续结果的功能。
  • combinations_cont2通过在将结果传递给实际延续之前拦截延续中的结果来工作。对同一个键的后续调用提供了一个延续,这个延续是我们最初截获的答案。

这表明我们能够:

  1. 使用延续传递风格进行记忆。
  2. 实现与 vanilla memoized 版本相似的(ish)性能特征。

我希望这能澄清一点。抱歉,我的博客代码片段不完整(我想我最近重新格式化时可能丢失了它)。

于 2013-07-04T07:01:01.133 回答