我想用 Elastic Search/Kibana 分析我的日志数据并按月计算唯一客户。当我使用日期直方图聚合和日期范围聚合时,结果会有所不同。
这是日期直方图查询:
"query": {
"query_string": {
"query": "_type:logs AND created_at:[2015-04-01 TO now]",
"analyze_wildcard": true
}
},
"size": 0,
"aggs": {
"2": {
"date_histogram": {
"field": "created_at",
"interval": "1M",
"min_doc_count": 1
},
"aggs": {
"1": {
"cardinality": {
"field": "customer.id"
}
}
}
}
}
结果:
"aggregations": {
"2": {
"buckets": [
{
"1": {
"value": 595805
},
"key_as_string": "2015-04-01T00:00:00.000Z",
"key": 1427839200000,
"doc_count": 6410438
},
{
"1": {
"value": 647788
},
"key_as_string": "2015-05-01T00:00:00.000Z",
"key": 1430431200000,
"doc_count": 6669555
},...
这是日期范围查询:
"query": {
"query_string": {
"query": "_type:logs AND created_at:[2015-04-01 TO now]",
"analyze_wildcard": true
}
},
"size": 0,
"aggs": {
"2": {
"date_range": {
"field": "created_at",
"ranges": [
{
"from": "2015-04-01",
"to": "2015-05-01"
},
{
"from": "2015-05-01",
"to": "2015-06-01"
}
]
},
"aggs": {
"1": {
"cardinality": {
"field": "customer.id"
}
}
}
}
}
和回应:
"aggregations": {
"2": {
"buckets": [
{
"1": {
"value": 592179
},
"key": "2015-04-01T00:00:00.000Z-2015-05-01T00:00:00.000Z",
"from": 1427846400000,
"from_as_string": "2015-04-01T00:00:00.000Z",
"to": 1430438400000,
"to_as_string": "2015-05-01T00:00:00.000Z",
"doc_count": 6411884
},
{
"1": {
"value": 616995
},
"key": "2015-05-01T00:00:00.000Z-2015-06-01T00:00:00.000Z",
"from": 1430438400000,
"from_as_string": "2015-05-01T00:00:00.000Z",
"to": 1433116800000,
"to_as_string": "2015-06-01T00:00:00.000Z",
"doc_count": 6668060
}
]
}
}
在第一种情况下,我有 4 月的 595,805 和 5 月的 647,788 在第二种情况下,我有 4 月的 592,179 和 5 月的 616,995
有人可以解释为什么我在这些用例之间有这些差异?
谢谢
我更新了我的第一篇文章以添加另一个示例
我添加了另一个数据较少的示例(在 1 天),但存在相同的问题。这是带有日期直方图的第一个请求:
{
"size": 0,
"query": {
"query_string": {
"query": "_type:logs AND logs.created_at:[2015-04-01 TO 2015-04-01]",
"analyze_wildcard": true
}
},
"aggs": {
"2": {
"date_histogram": {
"field": "created_at",
"interval": "1h",
"pre_zone": "00:00",
"pre_zone_adjust_large_interval": true,
"min_doc_count": 1
},
"aggs": {
"1": {
"cardinality": {
"field": "customer.id"
}
}
}
}
}
}
我们可以看到第一个小时有 660 个唯一计数和 1717 个文档计数:
{
"hits":{
"total":203961,
"max_score":0,
"hits":[
]
},
"aggregations":{
"2":{
"buckets":[
{
"1":{
"value":660
},
"key_as_string":"2015-04-01T00:00:00.000Z",
"key":1427846400000,
"doc_count":1717
},
{
"1":{
"value":324
},
"key_as_string":"2015-04-01T01:00:00.000Z",
"key":1427850000000,
"doc_count":776
},
{
"1":{
"value":190
},
"key_as_string":"2015-04-01T02:00:00.000Z",
"key":1427853600000,
"doc_count":481
}
]
}
}
}
但是在日期范围的第二个请求中:
{
"size": 0,
"query": {
"query_string": {
"query": "_type:logs AND logs.created_at:[2015-04-01 TO 2015-04-01]",
"analyze_wildcard": true
}
},
"aggs": {
"2": {
"date_range": {
"field": "created_at",
"ranges": [
{
"from": "2015-04-01T00:00:00",
"to": "2015-04-01T01:00:00"
},
{
"from": "2015-04-01T01:00:00",
"to": "2015-04-01T02:00:00"
}
]
},
"aggs": {
"1": {
"cardinality": {
"field": "customer.id"
}
}
}
}
}
}
我们只能看到 633 个唯一计数和 1717 个文档计数:
{
"hits":{
"total":203961,
"max_score":0,
"hits":[
]
},
"aggregations":{
"2":{
"buckets":[
{
"1":{
"value":633
},
"key":"2015-04-01T00:00:00.000Z-2015-04-01T01:00:00.000Z",
"from":1427846400000,
"from_as_string":"2015-04-01T00:00:00.000Z",
"to":1427850000000,
"to_as_string":"2015-04-01T01:00:00.000Z",
"doc_count":1717
},
{
"1":{
"value":328
},
"key":"2015-04-01T01:00:00.000Z-2015-04-01T02:00:00.000Z",
"from":1427850000000,
"from_as_string":"2015-04-01T01:00:00.000Z",
"to":1427853600000,
"to_as_string":"2015-04-01T02:00:00.000Z",
"doc_count":776
}
]
}
}
}
请有人能告诉我为什么?谢谢