0

这是我的 arff 文件(links.arff):

@relation links

@attribute isLink1Present numeric
@attribute isLink2Present numeric
@attribute isLink3Present numeric
@attribute isLink4Present numeric
@attribute isLink6Present numeric
@attribute isLink7Present numeric
@attribute isLink8Present numeric
@attribute isLink9Present numeric

@data
0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0
1,1,1,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0
1,0,0,1,0,0,0,0,0
1,0,1,0,0,0,0,0,0
1,0,1,1,0,0,0,0,0
1,1,0,0,0,0,0,0,0
1,1,0,1,0,0,0,0,0
1,1,1,0,0,0,0,0,0
1,1,1,1,0,0,0,0,0
1,0,0,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0
1,0,0,1,0,0,0,0,0
1,0,0,1,1,0,0,0,0
1,0,1,0,0,0,0,0,0
1,0,1,0,1,0,0,0,0
1,0,1,1,0,0,0,0,0
1,0,1,1,1,0,0,0,0
1,1,0,0,0,0,0,0,0
1,1,0,0,1,0,0,0,0
1,1,0,1,0,0,0,0,0
1,1,0,1,1,0,0,0,0
1,1,1,0,0,0,0,0,0
1,1,1,0,1,0,0,0,0
1,1,1,1,0,0,0,0,0
1,1,1,1,1,0,0,0,0
1,0,0,0,0,0,0,0,0
1,0,0,0,0,1,0,0,0
1,0,0,0,1,0,0,0,0
1,0,0,0,1,1,0,0,0
1,0,0,1,0,0,0,0,0
1,0,0,1,0,1,0,0,0
1,0,0,1,1,0,0,0,0
1,0,0,1,1,1,0,0,0
1,0,1,0,0,0,0,0,0
1,0,1,0,0,1,0,0,0
1,0,1,0,1,0,0,0,0
1,0,1,0,1,1,0,0,0
1,0,1,1,0,0,0,0,0
1,0,1,1,0,1,0,0,0
1,0,1,1,1,0,0,0,0
1,0,1,1,1,1,0,0,0
1,1,0,0,0,0,0,0,0
1,1,0,0,0,1,0,0,0
1,1,0,0,1,0,0,0,0
1,1,0,0,1,1,0,0,0
1,1,0,1,0,0,0,0,0
1,1,0,1,0,1,0,0,0
1,1,0,1,1,0,0,0,0
1,1,0,1,1,1,0,0,0
1,1,1,0,0,0,0,0,0
1,1,1,0,0,1,0,0,0
1,1,1,0,1,0,0,0,0
1,1,1,0,1,1,0,0,0
1,1,1,1,0,0,0,0,0
1,1,1,1,0,1,0,0,0
1,1,1,1,1,0,0,0,0
1,1,1,1,1,1,0,0,0
1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0
1,0,0,0,0,1,0,0,0
1,0,0,0,0,1,1,0,0
1,0,0,0,1,0,0,0,0
1,0,0,0,1,0,1,0,0
1,0,0,0,1,1,0,0,0
1,0,0,0,1,1,1,0,0
1,0,0,1,0,0,0,0,0
1,0,0,1,0,0,1,0,0
1,0,0,1,0,1,0,0,0
1,0,0,1,0,1,1,0,0
1,0,0,1,1,0,0,0,0
1,0,0,1,1,0,1,0,0
1,0,0,1,1,1,0,0,0
1,0,0,1,1,1,1,0,0
1,0,1,0,0,0,0,0,0
1,0,1,0,0,0,1,0,0
1,0,1,0,0,1,0,0,0
1,0,1,0,0,1,1,0,0
1,0,1,0,1,0,0,0,0
1,0,1,0,1,0,1,0,0
1,0,1,0,1,1,0,0,0
1,0,1,0,1,1,1,0,0
1,0,1,1,0,0,0,0,0
1,0,1,1,0,0,1,0,0
1,0,1,1,0,1,0,0,0
1,0,1,1,0,1,1,0,0
1,0,1,1,1,0,0,0,0
1,0,1,1,1,0,1,0,0
1,0,1,1,1,1,0,0,0
1,0,1,1,1,1,1,0,0
1,1,0,0,0,0,0,0,0
1,1,0,0,0,0,1,0,0
1,1,0,0,0,1,0,0,0
1,1,0,0,0,1,1,0,0

这是我实现 k-means 的方式:

public void runKMeans(int numClusters){
    try {
        SimpleKMeans kmeans = new SimpleKMeans();

        //DistanceFunction df = new weka.core.ManhattanDistance();
        DistanceFunction df = new weka.core.EuclideanDistance();

        kmeans.setDistanceFunction(df);
        kmeans.setSeed(10);

        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(numClusters);

        String arffFile = new PropertyUtils().getProperty("datafiles-home")+"\\links.arff";
        DataSource source = new DataSource(arffFile);
        Instances instances = source.getDataSet();

        //inst.setDataset(instances);
        kmeans.buildClusterer(instances);
        System.out.println(kmeans.displayStdDevsTipText());

        // This array returns the cluster number (starting with 0) for each instance
        // The array has as many elements as the number of instances
        int[] assignments = kmeans.getAssignments();

        int i=0;

        List<Cluster> lc = new ArrayList<Cluster>();
        for(int clusterNum : assignments) {
            lc.add(new Cluster((i+1) , clusterNum));
          //  System.out.println("Instance "+(i+1)+" -> Cluster "+clusterNum);
            i++;

        }
        Collections.sort(lc);

        for(Cluster c : lc){
            PrintUtils.println("Instance : "+c.getInstance()+" Cluster "+c.getCluster());
        }

        }
        catch(Exception e){
            e.printStackTrace();
        }
}

我想将每一列数据与一个“名称”属性相关联,这样我就可以识别每一列。我怎样才能做到这一点?我不认为我可以向@data 添加 String 属性,因为这会影响 k-means 算法的实现?还有其他方法吗?

4

1 回答 1

0

是的,您可以添加一个附加属性来命名实例。

然后,对于EuclideanDistance,您可以使用-Roption 或setAttributeIndices来决定要用于计算距离的属性范围。删除name属性将起作用!

于 2013-07-26T09:19:19.547 回答