我们有一个应用程序,Python 模块会将数据写入 redis 分片,而 Java 模块将从 redis 分片读取数据,因此我需要为 Java 和 Python 实现完全相同的一致哈希算法,以确保可以找到数据。
我用谷歌搜索并尝试了几种实现,但发现 Java 和 Python 的实现总是不同的,不能一起使用。需要你的帮助。
编辑,我尝试过的在线实现:
Java:http
://weblogs.java.net/blog/tomwhite/archive/2007/11/consistent_hash.html
Python:http://techspot.zzzeek.org/2012/07/07 /the-absolutely-simplest-consistent-hashing-example/
http://amix.dk/blog/post/19367
编辑、附加 Java(使用 Google Guava lib)和我编写的 Python 代码。代码基于以上文章。
import java.util.Collection;
import java.util.SortedMap;
import java.util.TreeMap;
import com.google.common.hash.HashFunction;
public class ConsistentHash<T> {
private final HashFunction hashFunction;
private final int numberOfReplicas;
private final SortedMap<Long, T> circle = new TreeMap<Long, T>();
public ConsistentHash(HashFunction hashFunction, int numberOfReplicas,
Collection<T> nodes) {
this.hashFunction = hashFunction;
this.numberOfReplicas = numberOfReplicas;
for (T node : nodes) {
add(node);
}
}
public void add(T node) {
for (int i = 0; i < numberOfReplicas; i++) {
circle.put(hashFunction.hashString(node.toString() + i).asLong(),
node);
}
}
public void remove(T node) {
for (int i = 0; i < numberOfReplicas; i++) {
circle.remove(hashFunction.hashString(node.toString() + i).asLong());
}
}
public T get(Object key) {
if (circle.isEmpty()) {
return null;
}
long hash = hashFunction.hashString(key.toString()).asLong();
if (!circle.containsKey(hash)) {
SortedMap<Long, T> tailMap = circle.tailMap(hash);
hash = tailMap.isEmpty() ? circle.firstKey() : tailMap.firstKey();
}
return circle.get(hash);
}
}
测试代码:
ArrayList<String> al = new ArrayList<String>();
al.add("redis1");
al.add("redis2");
al.add("redis3");
al.add("redis4");
String[] userIds =
{"-84942321036308",
"-76029520310209",
"-68343931116147",
"-54921760962352"
};
HashFunction hf = Hashing.md5();
ConsistentHash<String> consistentHash = new ConsistentHash<String>(hf, 100, al);
for (String userId : userIds) {
System.out.println(consistentHash.get(userId));
}
Python代码:
import bisect
import md5
class ConsistentHashRing(object):
"""Implement a consistent hashing ring."""
def __init__(self, replicas=100):
"""Create a new ConsistentHashRing.
:param replicas: number of replicas.
"""
self.replicas = replicas
self._keys = []
self._nodes = {}
def _hash(self, key):
"""Given a string key, return a hash value."""
return long(md5.md5(key).hexdigest(), 16)
def _repl_iterator(self, nodename):
"""Given a node name, return an iterable of replica hashes."""
return (self._hash("%s%s" % (nodename, i))
for i in xrange(self.replicas))
def __setitem__(self, nodename, node):
"""Add a node, given its name.
The given nodename is hashed
among the number of replicas.
"""
for hash_ in self._repl_iterator(nodename):
if hash_ in self._nodes:
raise ValueError("Node name %r is "
"already present" % nodename)
self._nodes[hash_] = node
bisect.insort(self._keys, hash_)
def __delitem__(self, nodename):
"""Remove a node, given its name."""
for hash_ in self._repl_iterator(nodename):
# will raise KeyError for nonexistent node name
del self._nodes[hash_]
index = bisect.bisect_left(self._keys, hash_)
del self._keys[index]
def __getitem__(self, key):
"""Return a node, given a key.
The node replica with a hash value nearest
but not less than that of the given
name is returned. If the hash of the
given name is greater than the greatest
hash, returns the lowest hashed node.
"""
hash_ = self._hash(key)
start = bisect.bisect(self._keys, hash_)
if start == len(self._keys):
start = 0
return self._nodes[self._keys[start]]
测试代码:
import ConsistentHashRing
if __name__ == '__main__':
server_infos = ["redis1", "redis2", "redis3", "redis4"];
hash_ring = ConsistentHashRing()
test_keys = ["-84942321036308",
"-76029520310209",
"-68343931116147",
"-54921760962352",
"-53401599829545"
];
for server in server_infos:
hash_ring[server] = server
for key in test_keys:
print str(hash_ring[key])