哈希图和哈希码问题
有一个 POJO、重写的哈希码和 equals 来帮助一个特定的比较器(这里没有显示)
package coll.hset;
public class Dat {
private String name;
private String dat;
private int aa;//some business reason not used in hashcode and equals
public int hashCode(){
int h = 0 ;
if(name != null){
h += name.hashCode();
}
if(dat != null){
h += dat.hashCode();
}
return h;
}
public boolean equals(Object o){
if(o instanceof Dat){
Dat oo = (Dat)o;
if(this.name ==null && oo.name != null){
return false;
}else if(!name.equals(oo.name)){
return false;
}
if(this.dat ==null && oo.dat != null){
return false;
}else if(!dat.equals(oo.dat)){
return false;
}
return true;
}
return false;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDat() {
return dat;
}
public void setDat(String dat) {
this.dat = dat;
}
public int getAa() {
return aa;
}
public void setAa(int aa) {
this.aa = aa;
}
}
用户应用程序:
package coll.hset;
import java.util.HashSet;
import java.util.Random;
public class App {
final static int SZ = 2 ^ 8;
/**
* @param args
*/
public static void main(String[] args) {
Random rndm = new Random();// to create random data
Dat dd;// reference while filling up set
Dat[] d2 = new Dat[500];// save a few here for later ops
int fills = 0;
HashSet<Dat> dats = new HashSet<Dat>();// set
for (int i = 0; i < 10000; i++) {
dd = new Dat();
dd.setAa(i);
// fill random dat and name.
char v = (char) (65 + rndm.nextInt(26));
dd.setDat("a " + v);
v = (char) (65 + rndm.nextInt(26));
char v1 = (char) (65 + rndm.nextInt(26));
char v2 = (char) (65 + rndm.nextInt(26));
char v3 = (char) (65 + rndm.nextInt(26));
char v4 = (char) (65 + rndm.nextInt(26));
dd.setName(v + " " + v1 + v2 + v3 + v1 + v + v4);
dats.add(dd);
if (i % 60 == 0) {
d2[fills++] = dd;
}
}
Dat ref = d2[0];
int hash = hash(ref.hashCode());
int idx = indexFor(hash, SZ);
boolean has1 = dats.contains(d2[0]);
System.out.println("has d 0 :" + has1 + ", name :" + ref.getName() + ", hash :" + ref.hashCode() + ". hash2 :" + hash + ", idx :" + idx + ", when size of table :" + SZ);
d2[0].setName(ref.getName() + "l");
// d2[0].setName(ref.getName() + "l");
d2[0].setName("Tony G");
// ref.setDat("sd=");
hash = hash(ref.hashCode());
// if you run this many times will see that for some cases the table is the same, so a quicker rehash, instead of remove and add back after change is what I'm after
idx = indexFor(hash, SZ);
has1 = dats.contains(d2[0]);
System.out.println("has d 0 after name change :" + has1 + ", name :" + ref.getName() + ".");
System.out.println("has d 0 :" + has1 + ", name :" + ref.getName() + ", hash :" + ref.hashCode() + ". hash2 :" + hash + ", idx :" + idx + ", when size of table :" + SZ);
System.out.println(" at : " + new java.util.Date());
}
static int hash(int h) {
// From Sun Java impl
/*
* / This function ensures that hashCodes that differ only by // constant multiples at each bit position have a bounded // number of collisions (approximately 8 at default load factor).
*/
h ^= (h >>> 20) ^ (h >>> 12);
return h ^ (h >>> 7) ^ (h >>> 4);
}
static int indexFor(int h, int length) {
return h & (length - 1);
}
}
正如预期的那样,第二次搜索表明 Dat 对象 d2[0] 不在集合中,即使它是。我知道如何修复它 - 一种方法 - 删除它,更改它然后将其添加回来。有没有其他方法可以告诉集合我们正在改变一个特定的对象?
可以看到 Oracle/Sun Java HashMap 如何自我重新散列。问题是 - 我们可以添加一个告诉集合的新方法 - 请重新散列这个对象,而不是删除并重新添加它,这样它更有效。
如果你多次运行上面的代码会发现在某些情况下表格是相同的(对于变异对象的之前和之后的哈希码),所以更快的重新哈希,而不是在更改后删除并添加回来是我所追求的,这利用了这一事实,并且仅在存储桶更改时才重新散列。