4

我有一个应用程序来跟踪网站的页面访问。这是我的模型:

public class VisitSession {
    public string SessionId { get; set; }
    public DateTime StartTime { get; set; }
    public string UniqueVisitorId { get; set; }
    public IList<PageVisit> PageVisits { get; set; }
}

当访问者访问网站时,访问会话开始。一次访问会话有许多页面访问。当访问者第一次访问该网站时,跟踪器将写入一个 UniqueVisitorId (GUID) cookie。因此,我们能够知道访问者是否是回访者。

现在我想写一个视图来显示每天的 TotalVisitSessions、TotalPageVisits、TotalUniqueVisitors。所以我写了这个多图减少:

public class VisitSummaryByDateIndex : AbstractMultiMapIndexCreationTask<VisitSummaryByDate>
{
    public VisitSummaryByDateIndex()
    {
        AddMap<VisitSession>(sessions => from s in sessions
                                            select new VisitSummaryByDate
                                            {
                                                Date = s.StartTime.Date,
                                                TotalVisitSessions = 1,
                                                TotalPageVisits = 0,
                                                TotalNewVisitors = s.IsNewVisit ? 1 : 0,
                                                TotalUniqueVisitors = 0,
                                                UniqueVisitorId = s.UniqueVisitorId
                                            });

        AddMap<PageVisit>(visits => from v in visits
                                    select new VisitSummaryByDate
                                    {
                                        Date = v.VisitTime.Date,
                                        TotalVisitSessions = 0,
                                        TotalPageVisits = 1,
                                        TotalNewVisitors = 0,
                                        TotalUniqueVisitors = 0,
                                        UniqueVisitorId = String.Empty
                                    });

        Reduce = results => from result in results
                            group result by result.Date into g
                            select new VisitSummaryByDate
                            {
                                Date = g.Key,
                                TotalVisitSessions = g.Sum(it => it.TotalVisitSessions),
                                TotalPageVisits = g.Sum(it => it.TotalPageVisits),
                                TotalNewVisitors = g.Sum(it => it.TotalNewVisitors),
                                TotalUniqueVisitors = g.Select(it => it.UniqueVisitorId).Where(it => it.Length > 0).Distinct().Count(),
                                UniqueVisitorId = String.Empty
                            };
    }
}

问题出在“TotalUniqueVisitors”计算中,有时索引结果的TotalUniqueVisitors是1,有时是2。但是我查了数据,绝对不会这么少。我的 Map/Reduce 语法有问题吗?

相关文章: Raven DB:如何按日期创建“UniqueVisitorCount”索引

可以在此处找到带有示例数据的代码: https ://gist.github.com/2702071

4

2 回答 2

2

正确的索引是:

public class VisitSummaryByDateIndex : AbstractMultiMapIndexCreationTask<VisitSummaryByDate>
{
    public VisitSummaryByDateIndex()
    {
        AddMap<VisitSession>(sessions => from s in sessions
                                         select new VisitSummaryByDate
                                         {
                                             Date = s.StartTime.Date,
                                             TotalVisitSessions = 1,
                                             TotalPageVisits = 0,
                                             TotalNewVisitors = s.IsNewVisit ? 1 : 0,
                                             TotalUniqueVisitors = 0,
                                             UniqueVisitorId = s.UniqueVisitorId
                                         });

        AddMap<PageVisit>(visits => from v in visits
                                    select new VisitSummaryByDate
                                    {
                                        Date = v.VisitTime.Date,
                                        TotalVisitSessions = 0,
                                        TotalPageVisits = 1,
                                        TotalNewVisitors = 0,
                                        TotalUniqueVisitors = 0,
                                        UniqueVisitorId = string.Empty,
                                    });

        Reduce = results => from result in results
                            group result by result.Date into g
                            select new VisitSummaryByDate
                            {
                                Date = g.Key,
                                TotalVisitSessions = g.Sum(it => it.TotalVisitSessions),
                                TotalPageVisits = g.Sum(it => it.TotalPageVisits),
                                TotalNewVisitors = g.Sum(it => it.TotalNewVisitors),
                                TotalUniqueVisitors = g.Select(it => it.UniqueVisitorId).Where(x => x.Length > 0).Distinct().Count(),
                                UniqueVisitorId = g.FirstOrDefault().UniqueVisitorId,
                            };
    }
}

不同的是在reduce中设置了UniqueVisitorId。我不能 100% 确定为什么还需要这样做,我必须承认。

于 2012-05-15T14:45:13.080 回答
2

Reduce 实际上是在结果上多次处理的。您的索引假定这只发生一次,并且可以访问整个结果集。

您的索引需要如下所示:

public class VisitSummaryByDateIndex : AbstractMultiMapIndexCreationTask<VisitSummaryByDate>
{
    public VisitSummaryByDateIndex()
    {
        AddMap<VisitSession>(sessions => from s in sessions
                                         select new VisitSummaryByDate
                                         {
                                             Date = s.StartTime.Date,
                                             TotalVisitSessions = 1,
                                             TotalPageVisits = 0,
                                             TotalNewVisitors = s.IsNewVisit ? 1 : 0,
                                             TotalUniqueVisitors = 1,
                                             UniqueVisitorId = new[]{s.UniqueVisitorId}
                                         });

        AddMap<PageVisit>(visits => from v in visits
                                    select new VisitSummaryByDate
                                    {
                                        Date = v.VisitTime.Date,
                                        TotalVisitSessions = 0,
                                        TotalPageVisits = 1,
                                        TotalNewVisitors = 0,
                                        TotalUniqueVisitors = 0,
                                        UniqueVisitorId = new string[0]
                                    });

        Reduce = results => from result in results
                            group result by result.Date into g
                            select new VisitSummaryByDate
                            {
                                Date = g.Key,
                                TotalVisitSessions = g.Sum(it => it.TotalVisitSessions),
                                TotalPageVisits = g.Sum(it => it.TotalPageVisits),
                                TotalNewVisitors = g.Sum(it => it.TotalNewVisitors),
                                TotalUniqueVisitors = g.Sum(it => it.TotalUniqueVisitors),,
                                UniqueVisitorId =  g.Select(x=>x.UniqueVisitorId).Distinct()
                             };
    }
}
于 2012-05-15T14:55:40.753 回答