我们有一个很大的数据库。我们收集时事通讯,我想制作一个趋势页面。目标是使页面实时且快速!我们希望显示过去 2 小时、4 小时、24 小时、过去一周和过去一个月的热门新闻通讯。
我已经工作MongoDB
了一段时间,我尽量保持简单。我想要一个新的集合,trending,它将时事通讯页面的访问者存储在一个时间段中。每次访问时,我都想将时事通讯的信息添加到保存当时趋势时事通讯的对象和$inc
统计hits
字段中。
我的对象是:
{
"_id" : ObjectId("5d4b4ca5a6bba5f7ffb23b39"),
"bucket" : "last2hours",
"language" : "nl",
"time" : "2019-08-08_00",
"newsletters" : {
"5d4b29ba8ddf870fe15628c7" : {
"_id" : ObjectId("5d4b29ba8ddf870fe15628c7"),
"_slug" : "nieuwsbrief-dalstra-reizen-touring-december-2015",
"subject" : "Nieuwsbrief Dalstra Reizen Touring december 2015",
"date" : ISODate("2015-12-04T13:15:03.000+0000"),
"publisher" : {
"_id" : ObjectId("557ebcc54c79597761fd71c2"),
"_slug" : "dalstra-nl",
"name" : "dalstra.nl",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29af8ddf870fe15624ba" : {
"_id" : ObjectId("5d4b29af8ddf870fe15624ba"),
"_slug" : "the-carolina-weddings-show",
"subject" : "The Carolina Weddings Show",
"date" : ISODate("2015-12-04T13:13:54.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29ad8ddf870fe15623f4" : {
"_id" : ObjectId("5d4b29ad8ddf870fe15623f4"),
"_slug" : "newport-gangster-tour",
"subject" : "Newport Gangster Tour",
"date" : ISODate("2015-12-04T13:13:22.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29bb8ddf870fe15628f3" : {
"_id" : ObjectId("5d4b29bb8ddf870fe15628f3"),
"_slug" : "springwise-daily-shoe-insoles-control-devices-through-kicking-and-more",
"subject" : "Springwise Daily | Shoe insoles control devices through kicking, and more.",
"date" : ISODate("2015-12-04T13:15:05.000+0000"),
"publisher" : {
"_id" : ObjectId("5581f0b54c7959e82bfd71c2"),
"_slug" : "springwise-com",
"name" : "springwise.com",
"taal" : "nl"
},
"hits" : NumberInt(2)
}
}
}
{
"_id" : ObjectId("5d4b4ca5a6bba5f7ffb23b3b"),
"bucket" : "last2hours",
"language" : "nl",
"time" : "2019-08-08_01",
"newsletters" : {
"5d4b29ba8ddf870fe15628c7" : {
"_id" : ObjectId("5d4b29ba8ddf870fe15628c7"),
"_slug" : "nieuwsbrief-dalstra-reizen-touring-december-2015",
"subject" : "Nieuwsbrief Dalstra Reizen Touring december 2015",
"date" : ISODate("2015-12-04T13:15:03.000+0000"),
"publisher" : {
"_id" : ObjectId("557ebcc54c79597761fd71c2"),
"_slug" : "dalstra-nl",
"name" : "dalstra.nl",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29af8ddf870fe15624ba" : {
"_id" : ObjectId("5d4b29af8ddf870fe15624ba"),
"_slug" : "the-carolina-weddings-show",
"subject" : "The Carolina Weddings Show",
"date" : ISODate("2015-12-04T13:13:54.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29ad8ddf870fe15623f4" : {
"_id" : ObjectId("5d4b29ad8ddf870fe15623f4"),
"_slug" : "newport-gangster-tour",
"subject" : "Newport Gangster Tour",
"date" : ISODate("2015-12-04T13:13:22.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29bb8ddf870fe15628f3" : {
"_id" : ObjectId("5d4b29bb8ddf870fe15628f3"),
"_slug" : "springwise-daily-shoe-insoles-control-devices-through-kicking-and-more",
"subject" : "Springwise Daily | Shoe insoles control devices through kicking, and more.",
"date" : ISODate("2015-12-04T13:15:05.000+0000"),
"publisher" : {
"_id" : ObjectId("5581f0b54c7959e82bfd71c2"),
"_slug" : "springwise-com",
"name" : "springwise.com",
"taal" : "nl"
},
"hits" : NumberInt(2)
}
}
}
{
"_id" : ObjectId("5d4b4ca5a6bba5f7ffb23b3d"),
"bucket" : "last4hours",
"language" : "nl",
"time" : "2019-08-08_00",
"newsletters" : {
"5d4b29ba8ddf870fe15628c7" : {
"_id" : ObjectId("5d4b29ba8ddf870fe15628c7"),
"_slug" : "nieuwsbrief-dalstra-reizen-touring-december-2015",
"subject" : "Nieuwsbrief Dalstra Reizen Touring december 2015",
"date" : ISODate("2015-12-04T13:15:03.000+0000"),
"publisher" : {
"_id" : ObjectId("557ebcc54c79597761fd71c2"),
"_slug" : "dalstra-nl",
"name" : "dalstra.nl",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29af8ddf870fe15624ba" : {
"_id" : ObjectId("5d4b29af8ddf870fe15624ba"),
"_slug" : "the-carolina-weddings-show",
"subject" : "The Carolina Weddings Show",
"date" : ISODate("2015-12-04T13:13:54.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29ad8ddf870fe15623f4" : {
"_id" : ObjectId("5d4b29ad8ddf870fe15623f4"),
"_slug" : "newport-gangster-tour",
"subject" : "Newport Gangster Tour",
"date" : ISODate("2015-12-04T13:13:22.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29bb8ddf870fe15628f3" : {
"_id" : ObjectId("5d4b29bb8ddf870fe15628f3"),
"_slug" : "springwise-daily-shoe-insoles-control-devices-through-kicking-and-more",
"subject" : "Springwise Daily | Shoe insoles control devices through kicking, and more.",
"date" : ISODate("2015-12-04T13:15:05.000+0000"),
"publisher" : {
"_id" : ObjectId("5581f0b54c7959e82bfd71c2"),
"_slug" : "springwise-com",
"name" : "springwise.com",
"taal" : "nl"
},
"hits" : NumberInt(2)
}
}
}
这里的目标是只需要查询桶语言时间。所以,如果我想查看最近 2 小时的热门新闻通讯,我查询{bucket: 'last2hours', language: 'nl', time: '2019-08-08_00'}
,那么我就有了我需要的所有信息。不需要聚合。这个findOne
查询很快。
所以我做了一个方法来更新趋势集合:
public function setNewsletterTrendingStatistics($newsletter){
// Buckets
$trend_buckets = array(
'last2hours' => array('steps' => 2, 'step'=>'hour', 'format'=> 'Y-m-d_H'),
'last4hours' => array('steps' => 4, 'step'=>'hour', 'format'=> 'Y-m-d_H' ),
'last1day' => array('steps' => 24, 'step'=>'hour', 'format'=> 'Y-m-d_H' ),
'lastweek' => array('steps' => 7, 'step'=>'day', 'format'=> 'Y-m-d' ),
'lastmonth' => array('steps' => 31, 'step'=>'day', 'format'=> 'Y-m-d' ),
);
// $newsletter['date']->toDateTime()->format('U')
$buckets = array();
foreach($trend_buckets AS $bucket => $settings){
for($i=0; $i<$settings['steps']; $i++){
$buckets[] = array(
'bucket' => $bucket,
'time' => date($settings['format'], strtotime('+'. $i . ' ' . $settings['step'])),
'language' => $newsletter['publisher']['taal'],
);
}
}
// Add the stats to each bucket
foreach($buckets AS $bucket){
$query = array();
$query = $bucket;
$update = array(
'$set' => array(
'newsletters.' . (string) $newsletter['_id'] . '._id' => $newsletter['_id'],
'newsletters.' . (string) $newsletter['_id'] . '._slug' => $newsletter['_slug'],
'newsletters.' . (string) $newsletter['_id'] . '.subject' => $newsletter['subject'],
'newsletters.' . (string) $newsletter['_id'] . '.date' => $newsletter['date'],
'newsletters.' . (string) $newsletter['_id'] . '.publisher' => array(
'_id' => $newsletter['publisher']['_id'],
'_slug' => $newsletter['publisher']['_slug'],
'name' => $newsletter['publisher']['name'],
'taal' => $newsletter['publisher']['taal'],
),
),
'$inc' => array(
'newsletters.' . (string) $newsletter['_id'] . '.hits' => 1
),
);
$options = array('upsert'=>true);
$this->FW->mdb->{$this->config['collections']['newsletters_trending']}->updateOne($query, $update, $options);
}
}
首先,这是一个好方法吗?有更好的方法吗?其次,我想计算唯一命中,所以我需要保存一个IP address
. 我想计算查询中的唯一命中,update
所以我不必依赖findOne
查询。实现这一目标的最佳方法是什么?我知道我可以addtoset
使用IP addresses
. 但接下来我需要计算这些独特的IP addresses
.