更新 1,按照 Ayende 的回答

这是我第一次进入 RavenDb 并进行试验,我写了一个小 map/reduce,但不幸的是结果是空的?

我在 RavenDb 中加载了大约 160 万份文档


public class Tick
    public DateTime Time;
    public decimal Ask;
    public decimal Bid;
    public double AskVolume;
    public double BidVolume;



var ticks = session.Query<Tick>().Where(x => x.Time > new DateTime(2012, 4, 23) && x.Time < new DateTime(2012, 4, 24, 00, 0, 0)).ToList();

这给了我 90280 份文件,到目前为止一切都很好。


Map = rows => from row in rows 
                          select new
                              Max = row.Bid,
                              Min = row.Bid, 
                              Time = row.Time,
                              Count = 1

Reduce = results => from result in results
                                group result by new{ result.MaxBid, result.Count} into g
                                select new
                                    Max = g.Key.MaxBid,
                                    Min = g.Min(x => x.MaxBid),
                                    Time = g.Key.Time,
                                    Count = g.Sum(x => x.Count)



private class TickAggregationResult
    public decimal MaxBid { get; set; }
        public decimal MinBid { get; set; }
        public int Count { get; set; }



Raven.Client.Indexes.IndexCreation.CreateIndexes(typeof(TickAggregation).Assembly, documentStore);

        var session = documentStore.OpenSession();

        var g1 = session.Query<TickAggregationResult>(typeof(TickAggregation).Name);

        var group = session.Query<Tick, TickAggregation>()
                         .Where(x => x.Time > new DateTime(2012, 4, 23) && 
                                     x.Time < new DateTime(2012, 4, 24, 00, 0, 0)
            .Customize(x => x.WaitForNonStaleResults())



现在我得到一个错误:


让我解释一下我要在纯 sql 中完成的工作:

select min(Ask), count(*) as TickCount from Ticks 
where Time between '2012-04-23' and '2012-04-24)

不幸的是,Map/Reduce 不能那样工作。好吧,至少它的 Reduce 部分没有。为了减少您的设置,您必须预先定义特定的时间范围来分组,例如 - 每天、每周、每月等。如果您每天减少,您可以获得每天的最小/最大/计数。




using System;
using System.Linq;
using Raven.Client.Document;
using Raven.Client.Indexes;
using Raven.Client.Linq;

namespace ConsoleApplication1
  public class Tick
    public string Id { get; set; }
    public DateTime Time { get; set; }
    public decimal Bid { get; set; }

  /// <summary>
  /// This index is a true map/reduce, but its totals are for all time.
  /// You can't filter it by time range.
  /// </summary>
  class Ticks_Aggregate : AbstractIndexCreationTask<Tick, Ticks_Aggregate.Result>
    public class Result
      public decimal Min { get; set; }
      public decimal Max { get; set; }
      public int Count { get; set; }

    public Ticks_Aggregate()
      Map = ticks => from tick in ticks
               select new
                      Min = tick.Bid,
                      Max = tick.Bid,
                      Count = 1

      Reduce = results => from result in results
                group result by 0
                  into g
                  select new
                           Min = g.Min(x => x.Min),
                           Max = g.Max(x => x.Max),
                           Count = g.Sum(x => x.Count)

  /// <summary>
  /// This index can be filtered by time range, but it does not reduce anything
  /// so it will not be performant if there are many items inside the filter.
  /// </summary>
  class Ticks_ByTime : AbstractIndexCreationTask<Tick>
    public class Result
      public decimal Min { get; set; }
      public decimal Max { get; set; }
      public int Count { get; set; }

    public Ticks_ByTime()
      Map = ticks => from tick in ticks
               select new {tick.Time};

      TransformResults = (database, ticks) =>
                 from tick in ticks
                 group tick by 0
                 into g
                 select new
                        Min = g.Min(x => x.Bid),
                        Max = g.Max(x => x.Bid),
                        Count = g.Count()

  class Program
    private static void Main()
      var documentStore = new DocumentStore { Url = "http://localhost:8080" };
      IndexCreation.CreateIndexes(typeof(Program).Assembly, documentStore);

      var today = DateTime.Today;
      var rnd = new Random();

      using (var session = documentStore.OpenSession())
        // Generate 100 random ticks
        for (var i = 0; i < 100; i++)
          var tick = new Tick { Time = today.AddMinutes(i), Bid = rnd.Next(100, 1000) / 100m };


      using (var session = documentStore.OpenSession())
        // Query items with a filter.  This will create a dynamic index.
        var fromTime = today.AddMinutes(20);
        var toTime = today.AddMinutes(80);
        var ticks = session.Query<Tick>()
          .Where(x => x.Time >= fromTime && x.Time <= toTime)
          .OrderBy(x => x.Time);

        // Ouput the results of the above query
        foreach (var tick in ticks)
          Console.WriteLine("{0} {1}", tick.Time, tick.Bid);

        // Get the aggregates for all time
        var total = session.Query<Tick, Ticks_Aggregate>()
        Console.WriteLine("Min: {0}", total.Min);
        Console.WriteLine("Max: {0}", total.Max);
        Console.WriteLine("Count: {0}", total.Count);

        // Get the aggregates with a filter
        var filtered = session.Query<Tick, Ticks_ByTime>()
          .Where(x => x.Time >= fromTime && x.Time <= toTime)
          .Take(1024)  // max you can take at once
          .ToList()    // required!
        Console.WriteLine("Min: {0}", filtered.Min);
        Console.WriteLine("Max: {0}", filtered.Max);
        Console.WriteLine("Count: {0}", filtered.Count);


我可以设想一个解决方案来解决在时间上聚合具有潜在大范围的过滤器的问题。reduce 必须将事物分解为不同级别的越来越小的时间单位。这个代码有点复杂,但我正在为自己的目的工作。完成后,我将在 www.ravendb.net 的知识库中发布。



  1. 您必须ToList()在调用 single 之前执行 a 才能获得完整的结果集。
  2. 即使这在服务器上运行,您在结果范围内可以拥有的最大值是 1024,并且您必须指定 a Take(1024),否则您将获得默认值 128 max。由于它在服务器上运行,我没想到会这样。但我猜是因为您通常不会在 TransformResults 部分进行聚合。


