1

当我用 python 编写代码时,我总是尝试像使用 F# 一样思考替代方案:

我有一个seq元组,(key, value1, value2, ...)我在这里简化了元组,所以它的长度只有 2。键包含重复的数字。

let xs = Seq.zip [|1;2;2;3;3;3;4;1;1;2;2|] {0..10} // so this is a seq of tuple

[(1, 0),
 (2, 1),
 (2, 2),
 (3, 3),
 (3, 4),
 (3, 5),
 (4, 6),
 (1, 7),
 (1, 8),
 (2, 9),
 (2, 10)]

现在,我想构建一个函数,它将seq作为输入,并返回 a seq,它是原始 seq 的子集。

它必须捕获所有更改键的项目,并包括第一个和最后一个项目,seq如果它们不存在的话。

f1(xs) = [(1, 0), (2, 1), (3, 3), (4, 6), (1, 7), (2, 9), (2, 10)]
f1([]) = []

以下是我的 python 代码,它可以工作,但我不太喜欢它。

xs = zip([1,2,2,3,3,3,4,1,1,2,2], range(11))

def f1(xs):
    if not xs:
        return
    last_a = None # I wish I don't have to use None here.
    is_yield = False
    for a, b in xs:
        if a != last_a:
            last_a = a
            is_yield = True
            yield (a, b)
        else:
            is_yield = False
    if not is_yield:
        yield (a, b) # Ugly, use variable outside the loop.

print list(f1(xs))
print list(f1([]))

这是另一种方式,使用itertools

def f1(xs):
    group = None
    for _, group_iter in itertools.groupby(xs, key = lambda pair: pair[0]):
        group = list(group_iter)
        yield group[0]

    # make sure we yield xs[-1], doesn't work if xs is iterator.
    if group and len(group) > 1: # again, ugly, use variable outside the loop.
        yield group[-1]

在 F# 中,与 python 中Seq.groupBy的行为不同groupby。我想知道如何尽可能地解决这个问题,减少参考单元,减少mutable,并且没有太多麻烦。

4

5 回答 5

3

最简单的解决方案可能是将序列转换为数组,并将 John 的方法与通过索引抓取第一个和最后一个元素结合起来。但是,这是另一种添加到组合中的解决方案:

let f getKey (items: seq<_>) =
  use e = items.GetEnumerator()
  let rec loop doYield prev =
    seq {
      if doYield then yield prev
      if e.MoveNext() then 
        yield! loop (getKey e.Current <> getKey prev) e.Current
      elif not doYield then yield prev
    }
  if e.MoveNext() then loop true e.Current
  else Seq.empty

//Usage: f fst xs
于 2013-10-28T21:33:56.340 回答
3

一个应该可以工作,但也不是特别漂亮或短的递归解决方案可能看起来像这样 - 但使用模式匹配肯定会让这更好一点:

let whenKeyChanges input = seq {
  /// Recursively iterate over input, when the input is empty, or we found the last
  /// element, we just return it. Otherwise, we check if the key has changed since
  /// the last produced element (and return it if it has), then process the rest 
  let rec loop prevKey input = seq {
    match input with
    | [] -> ()
    | [last] -> yield last
    | (key, value)::tail ->
        if key <> prevKey then yield (key, value)
        yield! loop key tail }

  // Always return the first element if the input is not empty
  match List.ofSeq input with
  | [] -> ()
  | (key, value)::tail -> 
      yield (key, value)
      yield! loop key tail }

如果您想要一个更好、更具声明性的解决方案,那么您可以使用我一直在 BlueMountain Capital 工作的数据框架和时间序列库(尚未正式宣布,但应该可以使用)。

// Series needs to have unique keys, so we add an index to your original keys
// (so we have series with (0, 1) => 0;  (1, 2) => 1; ...
let xs = series <| Seq.zip (Seq.zip [0..10] [1;2;2;3;3;3;4;1;1;2;2]) [0..10]

// Create chunks such that your part of the key is the same in each chunk
let chunks = xs |> Series.chunkWhile (fun (_, k1) (_, k2) -> k1 = k2)

// For each chunk, return the first element, or the first and the last
// element, if this is the last chunk (as you always want to include the last element)
chunks 
|> Series.map (fun (i, k) chunk -> 
    let f = Series.firstValue chunk
    let l = Series.lastValue chunk
    if (i, k) = Series.lastKey chunks then
      if f <> l then [k, f; k, l] else [k, l]
    else [k, f]) 
// Concatenate the produced values into a single sequence
|> Series.values |> Seq.concat

分块是您在此处需要的关键操作(请参阅文档)。唯一棘手的事情是返回最后一个元素 - 可以通过多种不同的方式处理 - 不确定我使用的那个是否是最好的。

于 2013-10-28T20:55:54.287 回答
1

我认为这样的事情会奏效

let remove dup = 
    dup
    |> Seq.pairwise
    |> Seq.filter (fun ((a,b),(c,d)) -> a <> c)
    |> Seq.map fst     
于 2013-10-28T20:35:56.537 回答
0

很抱歉在这里迟到了。虽然到目前为止的答案非常好,但我觉得它们并没有表达对可变状态的基本需求,以便返回最后一个元素。虽然我也可以依赖 IEnumerable 方法,但序列表达式基本上是等价的。我们首先定义一个三向 DU 来封装状态。

type HitOrMiss<'T> =
| Starting
| Hit of 'T
| Miss of 'T

let foo2 pred xs = seq{
    let store = ref Starting        // Save last element and state
    for current in xs do            // Iterate sequence
        match !store with           // What had we before?
        | Starting ->               // No element yet
            yield current           // Yield first element
            store := Hit current
        | Hit last                  // Check if predicate is satisfied
        | Miss last when pred last current ->
            yield current           // Then yield intermediate element
            store := Hit current
        | _ ->
            store := Miss current
    match !store with
    | Miss last ->                  // Yield last element, if not already
        yield last
    | _ -> () }

[(1, 0)
 (2, 1)
 (2, 2)
 (3, 3)
 (3, 4)
 (3, 5)
 (4, 6)
 (1, 7)
 (1, 8)
 (2, 9)
 (2, 10)]
|> foo2 (fun (a,_) (b,_) -> a <> b) |> Seq.toList |> printfn "%A"
// [(1, 0); (2, 1); (3, 3); (4, 6); (1, 7); (2, 9); (2, 10)]
于 2013-10-29T10:17:42.000 回答
0

为了满足关于最后一个元素的特殊情况,正确的解决方案需要知道序列的结尾。因此,要么需要两次通过,以便在处理之前知道长度(例如 Tomas 的解决方案 - 第一次通过是复制到列表,这与 seq 在迭代时暴露其“结束”不同),要么你需要依赖IEnumerable方法以便你知道您在到达终点时进行迭代(例如 Daniel 的解决方案)。

下面的灵感来自约翰的代码的优雅,但通过预先获取长度(2-pass)来处理特殊情况。

let remove dup =
    let last = Seq.length dup - 2
    seq{
        yield Seq.head dup
        yield! dup
               |> Seq.pairwise
               |> Seq.mapi (fun i (a,b) -> if fst a <> fst b || i = last then Some(b) else None)
               |> Seq.choose id
    }
于 2013-10-29T05:00:55.723 回答