我在 python 网络应用程序中使用 elasticsearch 来查询新闻文档。数据库中实际上有 100000 个文档。
原始数据库是 mongo 数据库,elasticsearch 通过 mongoriver 插件插入。
问题是该函数需要大约 850 毫秒才能返回结果。我想尽可能减少这个数字。
这是我用来查询数据库的python代码(限制通常是16):
def search_news(term, limit, page, flagged_articles):
query = {
"query": {
"from": page*limit,
"size": limit,
"multi_match" : {
"query" : term,
"fields" : [ "title^3" , "category^5" , "entities.name^5", "art_text^1", "summary^1"]
}
},
"filter" : {
"not" : {
"filter" : {
"ids" : {
"values" : flagged_articles
}
},
"_cache" : True
}
}
}
es_query = json_util.dumps(query)
uri = 'http://localhost:9200/newsidx/_search'
r = requests.get(uri, data=es_query)
results = json.loads( r.text )
data = []
for res in results['hits']['hits']:
data.append(res['_source'])
return data
这是索引映射:
{
"news": {
"properties": {
"actual_rank": {
"type": "long"
},
"added": {
"type": "date",
"format": "dateOptionalTime"
},
"api_id": {
"type": "long"
},
"art_text": {
"type": "string"
},
"category": {
"type": "string"
},
"downvotes": {
"type": "long"
},
"entities": {
"properties": {
"etype": {
"type": "string"
},
"name": {
"type": "string"
}
}
},
"flags": {
"properties": {
"a": {
"type": "long"
},
"b": {
"type": "long"
},
"bad_image": {
"type": "long"
},
"c": {
"type": "long"
},
"d": {
"type": "long"
},
"innapropiate": {
"type": "long"
},
"irrelevant_info": {
"type": "long"
},
"miscategorized": {
"type": "long"
}
}
},
"media": {
"type": "string"
},
"published": {
"type": "string"
},
"published_date": {
"type": "date",
"format": "dateOptionalTime"
},
"show": {
"type": "boolean"
},
"source": {
"type": "string"
},
"source_rank": {
"type": "double"
},
"summary": {
"type": "string"
},
"times_showed": {
"type": "long"
},
"title": {
"type": "string"
},
"top_entities": {
"properties": {
"einfo_test": {
"type": "string"
},
"etype": {
"type": "string"
},
"name": {
"type": "string"
}
}
},
"tweet_article_poster": {
"type": "string"
},
"tweet_favourites": {
"type": "long"
},
"tweet_retweets": {
"type": "long"
},
"tweet_user_rank": {
"type": "double"
},
"upvotes": {
"type": "long"
},
"url": {
"type": "string"
}
}
}
}
编辑:给定龙卷风服务器信息输出,在服务器上测量响应时间。