我有一个如下所示的原始聚合脚本,但很难将其转换为 elasticsearch dsl。
我已经阅读了文档,发现描述说我们可以使用 .bucket()、.metric() 和 .pipeline() 方法来嵌套聚合,但缺乏进一步说明如何将这三个方法用于更复杂聚合的信息,喜欢更多的层次。
{
"aggs": {
"statistics": {
"terms": {
"field":"id"
},
"aggs":{
"date":{
"date_histogram":{
"min_doc_count":0,
"field":"date",
"interval":"1d",
"format":"yyyy-MM-dd"
},
"aggs":{
"column_a":{
"avg":{
"field":"column_a"
}
},
"column_b":{
"avg":{
"field":"column_b"
}
},
"column_c":{
"avg":{
"field":"column_c"
}
},
"a_gap":{
"serial_diff":{
"buckets_path":"column_a"
}
},
"b_gap":{
"serial_diff":{
"buckets_path":"column_b"
}
},
"c_gap":{
"serial_diff":{
"buckets_path":"column_c"
}
}
}
},
"sum_a_gap":{
"sum_bucket":{
"buckets_path":"date>a_gap"
}
},
"sum_b_gap":{
"sum_bucket":{
"buckets_path":"date>b_gap"
}
},
"sum_c_gap":{
"sum_bucket":{
"buckets_path":"date>c_gap"
}
}
}
}
}
}
我这样的 Elasticsearch-dsl 查询使“sum_a_gap”与“column_a”和“a_gap”处于同一级别。
self._search.aggs
.bucket('statistics', 'terms', field='id')
.bucket('date', 'date_histogram', field='date',
interval='1d', min_doc_count=0, format='yyyy-MM-dd')
.metric('column_a', 'avg', field='column_a')
.metric('column_b', 'avg', field='column_b')
.metric('column_c', 'avg', field='column_c')
.pipeline('a_gap', 'serial_diff', buckets_path='column_a')
.pipeline('b_gap', 'serial_diff', buckets_path='column_b')
.pipeline('c_gap', 'serial_diff', buckets_path='column_c')
.pipeline('sum_a_gap', 'sum_bucket', buckets_path='date>a_gap')
.pipeline('sum_b_gap', 'sum_bucket', buckets_path='date>b_gap')
.pipeline('sum_c_gap', 'sum_bucket', buckets_path='date>c_gap')
提前谢谢了!