f# - 如何在 F# 中实现“高效的广义折叠”？

Question

在Martin 等人的论文中。我读到了关于嵌套数据类型的有效广义折叠。这篇论文谈到了 Haskell，我想在 F# 中尝试一下。

到目前为止，我设法遵循了这个Nest例子，包括gfold.

type Pair<'a> = 'a * 'a
type Nest<'a> = Nil | Cons of 'a * Nest<Pair<'a>>

let example =
    Cons(1,
        Cons((2, 3),
            Cons(((4, 5), (6, 7)),
                Nil
            )
        )
    )

let pair (f:'a -> 'b) ((a, b):Pair<'a>) : Pair<'b> = f a, f b

let rec nest<'a, 'r> (f:'a -> 'r) : Nest<'a> -> Nest<'r> = function
    | Nil -> Nil
    | Cons(x, xs) -> Cons(f x, nest (pair f) xs)

//val gfold : e:'r -> f:('a * 'r -> 'r) -> g:(Pair<'a> -> 'a) -> _arg1:Nest<'a> -> 'r
let rec gfold e f g : Nest<'a> -> 'r = function
    | Nil -> e
    | Cons(x, xs) ->
        f(x, gfold e f g (nest g xs))

let uncurry f (a, b) = f a b

let up = uncurry (+)

let sum = example |> gfold 0 up up

不幸的是，gfold它似乎具有二次复杂性，这就是作者提出efold. 正如您可能猜到的那样，那是我无法工作的那个。在摆弄了许多类型注释之后，我想出了这个版本，它只剩下一个小波浪线：

let rec efold<'a, 'b, 'r> (e:'r) (f:'a * 'r -> 'r) (g:(Pair<'a> -> Pair<'a>) -> 'a -> 'a) (h:_) (nest:Nest<'a>) : 'r =
    match nest with
    | Nil -> e
    | Cons(x, xs) -> f(h x, efold e f g ((g << pair) h) xs)
                                                        ^^

唯一剩下的未指定类型是h. 编译器推断val h : ('a -> 'a)，但我认为需要有不同的类型。

提供的错误消息显示

错误类型不匹配。期望一个
Nest<'a>
但给定一个
Nest<Pair<'a>>
当统一 ''a' 和 'Pair<'a>' 时，结果类型将是无限的

使用正确类型h的错误应该会消失。但我对 Haskell 的理解不够，无法将其翻译成 F#。

另请参阅有关论文中可能出现的错字的讨论。

更新：这是我从kvb的回答中了解到的：

因此h，将输入类型转换为中间类型，就像在累加器可能是不同类型的常规折叠中一样。g然后用于将两个中间类型值减少为一个，同时f获取一个中间类型和一个输入类型以生成输出类型值。当然e也属于那种输出类型。

h确实直接应用于递归期间遇到的值。g另一方面，仅用于使 h 适用于越来越深的类型。

只看第一个例子，除了应用和推动递归f之外，它本身似乎并没有做太多的工作。h但是在复杂的方法中，我可以看到它是最重要的方法。什么出来，即它的工作马。

这对吗？

score 6 · Accepted Answer

Haskell 中的正确定义efold类似于：

efold :: forall n m b.
    (forall a. n a)->
    (forall a.(m a, n (Pair a)) -> n a)->
    (forall a.Pair (m a) -> m (Pair a))->
    (forall a.(a -> m b) -> Nest a -> n b) 
efold e f g h Nil = e 
efold e f g h (Cons (x,xs)) = f (h x, efold e f g (g . pair h) xs

这不能完全翻译为 F#，因为n和m是“更高种类的类型”——它们是在给定参数时创建类型的类型构造函数——在 F# 中不受支持（并且在 .NET 中没有清晰的表示形式））。

解释

您的更新询问如何解释折叠的参数。也许了解折叠如何工作的最简单方法是扩展将折叠应用于示例时发生的情况。你会得到这样的东西：

efold e f g h example ≡
    f (h 1, f ((g << pair h) (2, 3), f ((g << pair (g << pair h)) ((4,5), (6,7)), e)))

所以h将值映射到可以作为f ' 的第一个参数的类型。 g用于应用h到更深的嵌套对（这样我们就可以从 usingh作为类型的函数a -> m b转到Pair a -> m (Pair b)toPair (Pair a) -> m (Pair (Pair b))等），并f重复应用到脊椎以将的结果h与嵌套调用的结果结合到f. 最后，e只使用一次，作为最深层嵌套调用的种子f。

我认为这个解释与你的推断基本一致。 f对于组合不同层的结果当然是至关重要的。但g也很重要，因为它告诉您如何在一个层中组合各个部分（例如，在对节点求和时，它需要对左右嵌套的总和进行求和；如果您想使用折叠来构建一个新的嵌套，其中的值在每个级别都与输入的级别相反，您将使用g看起来大致像fun (a,b) -> b,a)。

简单的方法

一种选择是为您关心的每一对创建efold专门的实现。例如，如果我们想对包含在 a 中的列表的长度求和，那么和will 都只是。我们可以稍微概括一下，其中和不依赖于他们的论点的情况：nmNestn _m _intn _m _

let rec efold<'n,'m,'a> (e:'n) (f:'m*'n->'n) (g:Pair<'m> -> 'm) (h:'a->'m) : Nest<'a> -> 'n = function
| Nil -> e
| Cons(x,xs) -> f (h x, efold e f g (g << (pair h)) xs)

let total = efold 0 up up id example

另一方面，如果n和m确实使用它们的参数，那么您需要定义一个单独的特化（另外，您可能需要为每个多态参数创建新类型，因为 F# 对更高级别类型的编码很尴尬）。例如，要将嵌套的值收集到您想要的列表中n 'a=list<'a>和m 'b= 'b。e然后，我们可以观察到 type 的唯一值forall 'a.list<'a>是，而不是为参数类型定义新类型[]，所以我们可以这样写：

type ListIdF =
    abstract Apply : 'a * list<Pair<'a>> -> list<'a>

type ListIdG =
    abstract Apply : Pair<'a> -> Pair<'a>

let rec efold<'a,'b> (f:ListIdF) (g:ListIdG) (h:'a -> 'b) : Nest<'a> -> list<'b> = function
| Nil -> []
| Cons(x,xs) -> f.Apply(h x, efold f g (pair h >> g.Apply) xs)

let toList n = efold { new ListIdF with member __.Apply(a,l) = a::(List.collect (fun (x,y) -> [x;y]) l) } { new ListIdG with member __.Apply(p) = p } id n

复杂的方法

虽然 F# 不直接支持更高种类的类型，但事实证明可以以某种忠实的方式模拟它们。这是高等图书馆采取的方法。这是它的最小版本的样子。

我们创建了一个App<'T,'a>代表某种类型 application的类型T<'a>，但是我们将在其中创建一个可以用作第一个类型参数的虚拟伴侣类型App<_,_>：

type App<'F, 'T>(token : 'F, value : obj) = 
    do
        if obj.ReferenceEquals(token, Unchecked.defaultof<'F>) then
            raise <| new System.InvalidOperationException("Invalid token")

    // Apply the secret token to have access to the encapsulated value
    member self.Apply(token' : 'F) : obj =
        if not (obj.ReferenceEquals(token, token')) then
            raise <| new System.InvalidOperationException("Invalid token")
        value

现在我们可以为我们关心的类型构造函数定义一些伴随类型（这些通常可以存在于一些共享库中）：

// App<Const<'a>, 'b> represents a value of type 'a (that is, ignores 'b)
type Const<'a> private () =
    static let token = Const ()
    static member Inj (value : 'a) =
        App<Const<'a>, 'b>(token, value)
    static member Prj (app : App<Const<'a>, 'b>) : 'a =
        app.Apply(token) :?> _

// App<List, 'a> represents list<'a>
type List private () = 
    static let token = List()
    static member Inj (value : 'a list) =
        App<List, 'a>(token, value)
    static member Prj (app : App<List, 'a>) : 'a list =
        app.Apply(token) :?> _

// App<Id, 'a> represents just a plain 'a
type Id private () =
    static let token = Id()
    static member Inj (value : 'a) =
        App<Id, 'a>(token, value)
    static member Prj (app : App<Id, 'a>) : 'a =
        app.Apply(token) :?> _

// App<Nest, 'a> represents a Nest<'a>
type Nest private () =
    static let token = Nest()
    static member Inj (value : Nest<'a>) =
        App<Nest, 'a>(token, value)
    static member Prj (app : App<Nest, 'a>) : Nest<'a> =
        app.Apply(token) :?> _

现在我们可以一劳永逸地为有效折叠的参数定义更高等级的类型：

// forall a. n a
type E<'N> =
    abstract Apply<'a> : unit -> App<'N,'a>

// forall a.(m a, n (Pair a)) -> n a)
type F<'M,'N> =
    abstract Apply<'a> : App<'M,'a> * App<'N,'a*'a> -> App<'N,'a>

// forall a.Pair (m a) -> m (Pair a))
type G<'M> =
    abstract Apply<'a> : App<'M,'a> * App<'M,'a> -> App<'M,'a*'a>

所以折叠只是：

let rec efold<'N,'M,'a,'b> (e:E<'N>) (f:F<'M,'N>) (g:G<'M>) (h:'a -> App<'M,'b>) : Nest<'a> -> App<'N,'b> = function
| Nil -> e.Apply()
| Cons(x,xs) -> f.Apply(h x, efold e f g (g.Apply << pair h) xs)

现在要调用efold，我们需要对各种Inj和Prj方法进行一些调用，否则一切看起来都和我们预期的一样：

let toList n = 
    efold { new E<_> with member __.Apply() = List.Inj [] } 
          { new F<_,_> with member __.Apply(m,n) = Id.Prj m :: (n |> List.Prj |> List.collect (fun (x,y) -> [x;y])) |> List.Inj }
          { new G<_> with member __.Apply(m1,m2) = (Id.Prj m1, Id.Prj m2) |> Id.Inj }
          Id.Inj
          n
    |> List.Prj

let sumElements n =
    efold { new E<_> with member __.Apply() = Const.Inj 0 }
          { new F<_,_> with member __.Apply(m,n) = Const.Prj m + Const.Prj n |> Const.Inj }
          { new G<_> with member __.Apply(m1,m2) = Const.Prj m1 + Const.Prj m2 |> Const.Inj }
          Const.Inj
          n
    |> Const.Prj

let reverse n = 
    efold { new E<_> with member __.Apply() = Nest.Inj Nil }
          { new F<_,_> with member __.Apply(m,n) = Cons(Id.Prj m, Nest.Prj n) |> Nest.Inj }
          { new G<_> with member __.Apply(m1,m2) = (Id.Prj 2, Id.Prj m1) |> Id.Inj }
          Id.Inj
          n
    |> Nest.Prj

希望这里的模式是清晰的：在每个对象表达式中，应用程序方法会投射出每个参数，对它们进行操作，然后将结果注入回一个App<_,_>类型。使用一些inline魔法，我们可以使这个看起来更加一致（以一些类型注释为代价）：

let inline (|Prj|) (app:App< ^T, 'a>) = (^T : (static member Prj : App< ^T, 'a> -> 'b) app)
let inline prj (Prj x) = x
let inline inj x = (^T : (static member Inj : 'b -> App< ^T, 'a>) x)

let toList n = 
    efold { new E<List> with member __.Apply() = inj [] } 
          { new F<Id,_> with member __.Apply(Prj m, Prj n) = m :: (n |> List.collect (fun (x,y) -> [x;y])) |> inj }
          { new G<_> with member __.Apply(Prj m1,Prj m2) = (m1, m2) |> inj }
          inj
          n
    |> prj

let sumElements n =
    efold { new E<Const<_>> with member __.Apply() = inj 0 }
          { new F<Const<_>,_> with member __.Apply(Prj m, Prj n) = m + n |> inj }
          { new G<_> with member __.Apply(Prj m1,Prj m2) = m1 + m2 |> inj }
          inj
          n
    |> prj

let reverse n = 
    efold { new E<_> with member __.Apply() = Nest.Inj Nil }
          { new F<Id,_> with member __.Apply(Prj m,Prj n) = Cons(m, n) |> inj }
          { new G<_> with member __.Apply(Prj m1,Prj m2) = (m2, m1) |> inj }
          inj
          n
    |> prj

f# - 如何在 F# 中实现“高效的广义折叠”？

1 回答 1

解释

简单的方法

复杂的方法

Related

Reference