2

大约有 50 万个客户的 RavenDB 文档。其中一个属性是“城市”。我如何编写一个 LINQ 查询来获取每个城市的所有出现及其计数的列表。例如,如果一千个客户文档将“NY”作为城市值,那么我需要一个计数为 NY 1000 的城市列表;LA 200,或 1300,BO 5000 等。

这是我最初写的..

 Dictionary<string,int> cityStats = session.Query<Customer>()
                    .ToList()
                    .GroupBy(x => x.City)
                    .OrderBy(x => x.Count())
                    .ToDictionary(x => x.Key, x => x.Count());

但这看起来不像给我准确的结果..所以我更改了最大请求允许属性(我知道它不推荐)只是为了看看它是否会改变结果..但是将 maxrequest 值保持为 500000 也会给我带来相同的结果。我确定有大约 50 万份客户文件,所以需要加起来才能匹配。

4

1 回答 1

2

You need a map-reduce index to do this. Here's a short console program that demonstrates:

using System;
using System.Linq;
using Raven.Client.Document;
using Raven.Client.Indexes;

namespace ConsoleApplication1
{
  public class Customer
  {
    public string Id { get; set; }
    public string Name { get; set; }
    public string City { get; set; }
  }

  public class Customers_ByCity : AbstractIndexCreationTask<Customer, Customers_ByCity.Result>
  {
    public Customers_ByCity()
    {
      Map = customers => from customer in customers
                         select new
                         {
                           customer.City,
                           Count = 1
                         };

      Reduce = results => from result in results
                          group result by result.City
                          into g
                          select new
                          {
                            City = g.Key,
                            Count = g.Sum(x => x.Count)
                          };
    }

    public class Result
    {
      public string City { get; set; }
      public int Count { get; set; }
    }
  }

  class Program
  {
    private static void Main()
    {
      var documentStore = new DocumentStore { Url = "http://localhost:8080" };
      documentStore.Initialize();
      IndexCreation.CreateIndexes(typeof(Program).Assembly, documentStore);

      using (var session = documentStore.OpenSession())
      {
        session.Store(new Customer { Name = "John", City = "NY" });
        session.Store(new Customer { Name = "Jane", City = "NY" });
        session.Store(new Customer { Name = "Jim", City = "NY" });
        session.Store(new Customer { Name = "Sally", City = "LA" });
        session.Store(new Customer { Name = "Sam", City = "LA" });
        session.Store(new Customer { Name = "Suzie", City = "LA" });
        session.Store(new Customer { Name = "Sarah", City = "LA" });

        session.SaveChanges();
      }

      using (var session = documentStore.OpenSession())
      {
        // In a real app, you probably don't want to wait for nonstale results.
        // You will also want to consider what to do if you have more than one page of results (more than 1024 cities)

        var counts = session.Query<Customers_ByCity.Result, Customers_ByCity>()
          .Customize(x=> x.WaitForNonStaleResults())
          .Take(1024);

        foreach (var result in counts)
        {
          Console.WriteLine("{0}: {1}", result.City, result.Count);
        }

        Console.WriteLine();
      }
      Console.ReadLine();
    }
  }
}
于 2012-09-26T18:43:38.983 回答