2

我对以下方法很满意。它需要一个可枚举和一个排序的、不相交的范围列表,并跳过不在范围内的项目。如果范围为空,我们只需遍历每个项目。可枚举和范围列表都可能很大。我们希望这种方法具有尽可能高的性能。

有人能想到一段更优雅的代码吗?我主要对 C# 实现感兴趣,但如果有人有一个三字符的 APL 实现,那也很酷。

public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges) 
{
    Debug.Assert(ranges == null || ranges.Count > 0);

    int currentItem = 0;
    Pair<int, int> currentRange = new Pair<int, int>();
    int currentRangeIndex = -1;
    bool betweenRanges = false;
    if (ranges != null) 
    {
        currentRange = ranges[0];
        currentRangeIndex = 0;
        betweenRanges = currentRange.First > 0;
    }

    foreach (T item in source) 
    {
        if (ranges != null) {
            if (betweenRanges) {
                if (currentItem == currentRange.First)
                    betweenRanges = false;
                else {
                    currentItem++;
                    continue;
                }
            }
        }

        yield return item;

        if (ranges != null) {
            if (currentItem == currentRange.Second) {
                if (currentRangeIndex == ranges.Count - 1)
                    break; // We just visited the last item in the ranges

                currentRangeIndex = currentRangeIndex + 1;
                currentRange = ranges[currentRangeIndex];
                betweenRanges = true;
            }
        }

        currentItem++;
    }
}
4

6 回答 6

0

也许在你的source东西上使用 linq:

public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
    if(ranges == null)
        return null;
    return source.Where((item, index) => ranges.Any(y => y.First < index && y.Second > index)).AsEnumerable();
}

我面前没有我的 Windows PC,我不确定我是否正确理解了您的代码,但我尝试理解您的文本,而上面的代码可以工作....或类似的东西。

更新:关于性能问题,我建议您通过一些简单的测试和时间这两个功能来测试性能。

于 2010-11-24T20:26:59.740 回答
0

我的第二次尝试,这将考虑范围的顺序。我还没有尝试过,但我认为它有效:)。您可能可以将一些代码提取到更小的函数中以使其更具可读性。

public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
    int currentIndex = 0;
    int currentRangeIndex = 0;
    int maxRangeIndex = ranges.Length;
    bool done = false;
    foreach(var item in source)
    {
        if(currentIndex > range[currentRangeIndex].Second)
        {
           while(currentIndex > range[currentRangeIndex].Second)
           {
               if(!++currentRangeIndex < maxRangeIndex)
               {
                   // We've passed last range => 
                   // set done = true to break outer loop and then break
                   done = true;
                   break;
               }
           }
           if(currentIndex > range[currentRangeIndex].First)
               yield item; // include if larger than first since we now it's smaller than second
        }
        else if(currentIndex > range[currentRangeIndex].First)
        {
            // If higher than first and lower than second we're in range
            yield item;
        }
        if(done) // if done break outer loop
            break;

        currentIndex++; // always increase index when advancint through source
    }
}
于 2010-11-24T22:38:49.357 回答
0

这是我的看法。我发现它更容易理解,如果不是更优雅的话。

public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Tuple<int, int>> ranges)
{
    if (ranges == null)
        return source;

    Debug.Assert(ranges.Count > 0);
    return WalkRangesInternal(source, ranges);
}

static IEnumerable<T> WalkRangesInternal<T>(IEnumerable<T> source, List<Tuple<int, int>> ranges)
{
    int currentItem = 0;
    var rangeEnum = ranges.GetEnumerator();
    bool moreData = rangeEnum.MoveNext();

    using (var sourceEnum = source.GetEnumerator())
        while (moreData)
        {
            // skip over every item in the gap between ranges
            while (currentItem < rangeEnum.Current.Item1
               && (moreData = sourceEnum.MoveNext()))
                currentItem++;
            // yield all the elements in the range
            while (currentItem <= rangeEnum.Current.Item2
               && (moreData = sourceEnum.MoveNext()))
            {
                yield return sourceEnum.Current;
                currentItem++;
            }
            // advance to the next range
            moreData = rangeEnum.MoveNext();
        }
}
于 2010-11-24T20:47:13.973 回答
0

这个(未经测试)怎么样?应该具有非常相似的性能特征(纯流,没有不必要的缓冲,快速退出),但更容易遵循,IMO:

public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source,
                                           List<Pair<int, int>> ranges)
{
    if (source == null)
        throw new ArgumentNullException("source");

    // If ranges is null, just return the source. From spec.
    return ranges == null ? source : RangeIterate(source, ranges);
}

private static IEnumerable<T> RangeIterate<T>(IEnumerable<T> source, 
                                              List<Pair<int, int>> ranges)
{
    // The key bit: a lazy sequence of all valid indices belonging to
    // each range. No buffering.
    var validIndices = from range in ranges
                       let start = Math.Max(0, range.First)
                       from validIndex in Enumerable.Range(start, range.Second - start + 1)
                       select validIndex;

    int currentIndex = -1;

    using (var indexErator = validIndices.GetEnumerator())
    {
        // Optimization: Get out early if there are no ranges.
        if (!indexErator.MoveNext())
            yield break;

        foreach (var item in source)
        {
            if (++currentIndex == indexErator.Current)
            {
               // Valid index, yield.
                yield return item;

                // Move to the next valid index.
                // Optimization: get out early if there aren't any more.
                if (!indexErator.MoveNext())
                    yield break;
            }
        }
    }
}

如果你不介意缓冲索引,你可以做这样的事情,这更清楚,IMO:

public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, 
                                           List<Pair<int, int>> ranges)
{
    if (source == null)
        throw new ArgumentNullException("source");

    if (ranges == null)
        return source;

    // Optimization: Get out early if there are no ranges.    
    if (!ranges.Any())
        return Enumerable.Empty<T>();

    var validIndices = from range in ranges
                       let start = Math.Max(0, range.First)
                       from validIndex in Enumerable.Range(start, range.Second - start + 1)
                       select validIndex;

    // Buffer the valid indices into a set.
    var validIndicesSet = new HashSet<int>(validIndices);

    // Optimization: don't take an item beyond the last index of the last range.
    return source.Take(ranges.Last().Second + 1)
                 .Where((item, index) => validIndicesSet.Contains(index));

}
于 2010-11-24T20:48:45.050 回答
0

您可以将源列表复制到一个数组,然后对于每个范围,您可以阻止从新源数组复制到适当位置的目标数组。如果您可以将源集合作为数组传入,那将是一种更好的方法。如果您确实必须进行初始复制,则该操作为 O(N) 加上 O(M),其中 M 是最终数组中的项目总数。因此,无论哪种情况,它最终都会出现 O(N)。

于 2010-11-24T20:40:06.277 回答
0

您可以手动迭代集合,以防止枚举器在被跳过时获取当前项目:

public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
    Debug.Assert(ranges == null || ranges.Count > 0);

    int currentItem = 0;
    Pair<int, int> currentRange = new Pair<int, int>();
    int currentRangeIndex = -1;
    bool betweenRanges = false;
    if (ranges != null)
    {
        currentRange = ranges[0];
        currentRangeIndex = 0;
        betweenRanges = currentRange.First > 0;
    }

    using (IEnumerator<T> enumerator = source.GetEnumerator())
    {
        while (enumerator.MoveNext())
        {

            if (ranges != null)
            {
                if (betweenRanges)
                {
                    if (currentItem == currentRange.First)
                        betweenRanges = false;
                    else
                    {
                        currentItem++;
                        continue;
                    }
                }
            }

            yield return enumerator.Current;

            if (ranges != null)
            {
                if (currentItem == currentRange.Second)
                {
                    if (currentRangeIndex == ranges.Count - 1)
                        break; // We just visited the last item in the ranges

                    currentRangeIndex = currentRangeIndex + 1;
                    currentRange = ranges[currentRangeIndex];
                    betweenRanges = true;
                }
            }

            currentItem++;
        }
    }
}
于 2010-11-24T20:50:16.513 回答