首先,我们需要确保 ES 中保存的关系是唯一的。可以通过将任意 id 替换为由 user_id_1、relation 和 user_id_2 构造的 id 来完成。我们还需要确保 user_ids 的分析器不会产生多个令牌。如果 id 是字符串,它们必须被索引 not_analyzed。在满足这两个条件的情况下,我们可以简单地对由relation:friend限制的结果列表中的字段user_id_2使用terms facet查询。此查询将检索按索引中出现次数排序的前 user_id_2 id。总而言之,它可能看起来像这样:
curl -XPUT http://localhost:9200/relationships -d '{
"mappings" : {
"relation" : {
"_source" : {"enabled" : false },
"properties" : {
"user_id_1": { "type": "string", "index" : "not_analyzed"},
"relation": { "type": "string", "index" : "not_analyzed"},
"user_id_2": { "type": "string", "index" : "not_analyzed"}
}
}
}
}'
curl -XPUT http://localhost:9200/relationships/relation/2001-friend-1002 -d '{"user_id_1": "2001", "relation":"friend", "user_id_2": "1002"}'
curl -XPUT http://localhost:9200/relationships/relation/2002-friend-1002 -d '{"user_id_1": "2002", "relation":"friend", "user_id_2": "1002"}'
curl -XPUT http://localhost:9200/relationships/relation/2002-friend-1001 -d '{"user_id_1": "2002", "relation":"friend", "user_id_2": "1001"}'
curl -XPUT http://localhost:9200/relationships/relation/2003-friend-1003 -d '{"user_id_1": "2003", "relation":"friend", "user_id_2": "1003"}'
curl -XPOST http://localhost:9200/relationships/_refresh
echo
curl -XGET 'http://localhost:9200/relationships/relation/_search?pretty=true&search_type=count' -d '{
"query": {
"term" : {
"relation" : "friend"
}
},
"facets" : {
"popular" : {
"terms" : {
"field" : "user_id_2"
}
}
}
}'
请注意,由于构面计算的分布式特性,如果使用多个分片,构面查询报告的计数可能低于实际记录数。请参阅弹性搜索问题 1832
编辑:
编辑后的问题有两种解决方案。一种解决方案是在两个字段上使用 facet:
curl -XPUT http://localhost:9200/relationships -d '{
"mappings" : {
"relation" : {
"_source" : {"enabled" : false },
"properties" : {
"user_id_1": { "type": "string", "index" : "not_analyzed"},
"relation": { "type": "string", "index" : "not_analyzed"},
"user_id_2": { "type": "string", "index" : "not_analyzed"}
}
}
}
}'
curl -XPUT http://localhost:9200/users -d '{
"mappings" : {
"user" : {
"_source" : {"enabled" : false },
"properties" : {
"user_id": { "type": "string", "index" : "not_analyzed"}
}
}
}
}'
curl -XPUT http://localhost:9200/users/user/1001 -d '{"user_id": 1001}'
curl -XPUT http://localhost:9200/users/user/1002 -d '{"user_id": 1002}'
curl -XPUT http://localhost:9200/users/user/1003 -d '{"user_id": 1003}'
curl -XPUT http://localhost:9200/users/user/1004 -d '{"user_id": 1004}'
curl -XPUT http://localhost:9200/users/user/1005 -d '{"user_id": 1005}'
curl -XPUT http://localhost:9200/relationships/relation/2001-friend-1002 -d '{"user_id_1": "2001", "relation":"friend", "user_id_2": "1002"}'
curl -XPUT http://localhost:9200/relationships/relation/2002-friend-1002 -d '{"user_id_1": "2002", "relation":"friend", "user_id_2": "1002"}'
curl -XPUT http://localhost:9200/relationships/relation/2002-friend-1001 -d '{"user_id_1": "2002", "relation":"friend", "user_id_2": "1001"}'
curl -XPUT http://localhost:9200/relationships/relation/2003-friend-1003 -d '{"user_id_1": "2003", "relation":"friend", "user_id_2": "1003"}'
curl -XPOST http://localhost:9200/relationships/_refresh
curl -XPOST http://localhost:9200/users/_refresh
echo
curl -XGET 'http://localhost:9200/relationships,users/_search?pretty=true&search_type=count' -d '{
"query": {
"indices" : {
"indices" : ["relationships"],
"query" : {
"filtered" : {
"query" : {
"term" : {
"relation" : "friend"
}
},
"filter" : {
"type" : {
"value" : "relation"
}
}
}
},
"no_match_query" : {
"filtered" : {
"query" : {
"match_all" : { }
},
"filter" : {
"type" : {
"value" : "user"
}
}
}
}
}
},
"facets" : {
"popular" : {
"terms" : {
"fields" : ["user_id", "user_id_2"]
}
}
}
}'
另一种解决方案是在创建用户时为每个用户的关系索引添加“自我”关系。我更喜欢第二种解决方案,因为它似乎不那么复杂。