例如,如果我们有书籍、作者和日期信息的数据。我们可以为每个月为作者提供多少本书建立一个交叉过滤器吗?
问问题
8855 次
3 回答
25
用伪 sql 术语来说,您要做的是:
SELECT COUNT(book)
GROUP BY author, month
我处理这类问题的方法是将字段“组合”到一个维度中。因此,在您的情况下,我会将月份和作者信息连接在一起,形成一个维度。
让这成为我们的测试数据:
var cf = crossfilter([
{ date:"1 jan 2014", author: "Mr X", book: "Book 1" },
{ date:"2 jan 2014", author: "Mr X", book: "Book 2" },
{ date:"3 feb 2014", author: "Mr X", book: "Book 3" },
{ date:"1 mar 2014", author: "Mr X", book: "Book 4" },
{ date:"2 apr 2014", author: "Mr X", book: "Book 5" },
{ date:"3 apr 2014", author: "Mr X", book: "Book 6"},
{ date:"1 jan 2014", author: "Ms Y", book: "Book 7" },
{ date:"2 jan 2014", author: "Ms Y", book: "Book 8" },
{ date:"3 jan 2014", author: "Ms Y", book: "Book 9" },
{ date:"1 mar 2014", author: "Ms Y", book: "Book 10" },
{ date:"2 mar 2014", author: "Ms Y", book: "Book 11" },
{ date:"3 mar 2014", author: "Ms Y", book: "Book 12" },
{ date:"4 apr 2014", author: "Ms Y", book: "Book 13" }
]);
维度定义如下:
var dimensionMonthAuthor = cf.dimension(function (d) {
var thisDate = new Date(d.date);
return 'month='+thisDate.getMonth()+';author='+d.author;
});
现在我们可以简单地做一个减少计数来计算每个作者每月有多少本书(即每个维度单位):
var monthAuthorCount = dimensionMonthAuthor.group().reduceCount(function (d) { return d.book; }).all();
结果如下:
{"key":"month=0;author=Mr X","value":2}
{"key":"month=0;author=Ms Y","value":3}
{"key":"month=1;author=Mr X","value":1}
{"key":"month=2;author=Mr X","value":1}
{"key":"month=2;author=Ms Y","value":3}
{"key":"month=3;author=Mr X","value":2}
{"key":"month=3;author=Ms Y","value":1}
于 2013-11-26T15:04:04.247 回答
8
我没有发现接受的答案很有帮助。
我改用以下内容。
我首先创建了一个键控组(在您的情况下为月份)
var authors = cf.dimension(function (d) {
return +d['month'];
})
接下来,我在键控数据集上使用 map reduce 方法来计算平均值
分组辅助函数:
var monthsAvg = authors.group().reduce(reduceAddbooks, reduceRemovebooks, reduceInitialbooks).all();
map-reduce 函数:
function reduceAddbooks(p, v) {
p.author = v['author'];
p.books = +v['books'];
return p;
}
function reduceRemovebooks(p, v) {
p.author = v['author'];
p.books = +v['books'];
return p;
}
function reduceInitialbooks() {
return {
author:0,
books:0
};
}
于 2015-01-18T21:08:51.857 回答
5
我想使用以下描述的新工作来更新旧答案:https ://github.com/dc-js/dc.js/pull/91
此性能尚未在大型数据集上进行测试
var cf = crossfilter([
{ date:"1 jan 2014", author: "Mr X", book: "Book 1" },
{ date:"2 jan 2014", author: "Mr X", book: "Book 2" },
{ date:"3 feb 2014", author: "Mr X", book: "Book 3" },
{ date:"1 mar 2014", author: "Mr X", book: "Book 4" },
{ date:"2 apr 2014", author: "Mr X", book: "Book 5" },
{ date:"3 apr 2014", author: "Mr X", book: "Book 6"},
{ date:"1 jan 2014", author: "Ms Y", book: "Book 7" },
{ date:"2 jan 2014", author: "Ms Y", book: "Book 8" },
{ date:"3 jan 2014", author: "Ms Y", book: "Book 9" },
{ date:"1 mar 2014", author: "Ms Y", book: "Book 10" },
{ date:"2 mar 2014", author: "Ms Y", book: "Book 11" },
{ date:"3 mar 2014", author: "Ms Y", book: "Book 12" },
{ date:"4 apr 2014", author: "Ms Y", book: "Book 13" }
]);
var dimensionMonthAuthor = cf.dimension(function (d) {
var thisDate = new Date(d.date);
//stringify() and later, parse() to get keyed objects
return JSON.stringify ( { date: thisDate.getMonth() , author: d.author } ) ;
});
group = dimensionMonthAuthor.group();
//this forEach method could be very expensive on write.
group.all().forEach(function(d) {
//parse the json string created above
d.key = JSON.parse(d.key);
});
return group.all()
结果是:
[ { key: { date: 0, author: 'Mr X' },
value: 2 },
{ key: { date: 0, author: 'Ms Y' },
value: 3 },
{ key: { date: 1, author: 'Mr X' },
value: 1 },
{ key: { date: 2, author: 'Mr X' },
value: 1 },
{ key: { date: 2, author: 'Ms Y' },
value: 3 },
{ key: { date: 3, author: 'Mr X' },
value: 2 },
{ key: { date: 3, author: 'Ms Y' },
value: 1 } ]
于 2016-03-21T16:22:49.937 回答