这是我的代码:
import weka.clusterers.ClusterEvaluation;
import weka.clusterers.HierarchicalClusterer;
import weka.clusterers.EM;
import weka.core.converters.CSVLoader;
import weka.core.converters.ConverterUtils.DataSource;
import weka.core.neighboursearch.PerformanceStats;
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Enumeration;
import weka.core.*;
public class WEKASample1 {
public static void main(String[] args) {
Instances data = null;
CSVLoader csvLoader = new CSVLoader();
try {
csvLoader.setSource(new File("D:\\WEKA\\numbers.csv"));
data = csvLoader.getDataSet();
HierarchicalClusterer h = new HierarchicalClusterer();
DistanceFunction d = new DistanceFunction() {
@Override
public void setOptions(String[] arg0) throws Exception {
}
@Override
public Enumeration listOptions() {
return null;
}
@Override
public String[] getOptions() {
return null;
}
@Override
public void update(Instance arg0) {
}
@Override
public void setInvertSelection(boolean arg0) {
}
@Override
public void setInstances(Instances arg0) {
}
@Override
public void setAttributeIndices(String arg0) {
}
@Override
public void postProcessDistances(double[] arg0) {
}
@Override
public boolean getInvertSelection() {
return false;
}
@Override
public Instances getInstances() {
return null;
}
@Override
public String getAttributeIndices() {
return null;
}
@Override
public double distance(Instance arg0, Instance arg1, double arg2,
PerformanceStats arg3) {
return 0;
}
@Override
public double distance(Instance arg0, Instance arg1, double arg2) {
return 0;
}
@Override
public double distance(Instance arg0, Instance arg1, PerformanceStats arg2)
throws Exception {
return 0;
}
@Override
public double distance(Instance arg0, Instance arg1) {
double s1 = arg0.value(0);
double s2 = arg1.value(0);
return Double.POSITIVE_INFINITY;
}
};
h.setDistanceFunction(d);
SelectedTag s = new SelectedTag(1, HierarchicalClusterer.TAGS_LINK_TYPE);
h.setLinkType(s);
h.buildClusterer(data);
// double[] arr;
// for(int i=0; i<data.size(); i++) {
//
// arr = h.distributionForInstance(data.get(i));
// for(int j=0; j< arr.length; j++)
// System.out.print(arr[j]+",");
// System.out.println();
//
// }
System.out.println(h.numberOfClusters());
} catch (Exception e) {
e.printStackTrace();
}
}
}
现在,即使我也修改了 distancefucntion 方法,生成的簇数的输出始终为 2。如果是哪个集群,我怎么知道哪个实例?当我取消注释上面为获取实例的分布而编写的代码时,我得到一个 ArrayOutOfBound 异常。
但总的来说,谁能解释一下 WEKA 在这里是如何分层完成聚类的?
这是我的数据集,长度为 10,维度为 2:
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10