我的目标是使用减法聚类对我的数据进行聚类,以便进一步从中提取模糊规则。
假设我有以下二维数据:-
X[]=[ {0,.16,.24,.42,.48,.66,.83,.24,.42,.48,.66,.66,.16,.24,.42,.42,.48,.48,.48,.66,.66,.66,.66,.66,.66,.66,.83,.83,.83,.66},
{0,0,0,0,0,0,0,.15,.13,.1,.12,.18,.58,.78,.59,.78,.45,.49,.58,.45,.49,.58,.65,.71,.715,.72,.66,.725,.726,.455}
]
请为我提供有关如何在 java 中实现减法聚类的示例。
顺便说一句,我做了一些研究工作,并为此找到了以下算法
算法:-
- 使用两个维度中的最大值和最小值对数据进行归一化
- 通过使用计算电位
其中 m 是数据的维度或类型(在我的例子中为 2 ),n 是点数。
3 选择最高的潜在值作为第一个聚类中心,并修改所有数据点的潜力,直到
一些我是如何使用java实现它的,代码:-(请注意,由于某种原因,我无法提供完整的运行代码,所以我附上了执行所提出算法步骤的代码)
class SClustering {
double[][]data;
double normData[][];
ArrayList Potentials=new ArrayList();
ArrayList sortedPotentials;
ArrayList clusters=new ArrayList();
double rj[];
double radii; // radius
double squashFactor=1.5;
double acceptRatio=.5;
double rejectFactor=0.3;
double rb=radii*squashFactor;;
double alpha=4.0/(radii*radii);
double beta=4.0/(Math.pow(rb,2.0));
double max[];
double min[];
int numofdimen=2; // as according to the input dataset
int numofPoints=29; // as according to the input dataset
ArrayList centersArrayList=new ArrayList();
Potential p=new Potential();
double Pi=0;
boolean noCenter=false;
boolean flag=false;
public Clustering(double data[][], double Radii)
{
radii= Radii;
data=new double[2][29];
normData=new double[2][29];
max=new double[2];
min=new double[2];
rj=new double[2];
double[] sigmas=new double[centersArrayList.size()];
rj[0]=100;
rj[1]=50;
int index;
for(int i=0;i<29;i++)
{
for(int j=0;j<2;j++)
{
data[j][i]=data[j][i];
}
}
dataNormalize();
calculatePotential();
int m=0;
while(!flag)
{
sortPotentials();
index=setCenters(Potentials.size()-1);
sigmas=calculateSigmas();
if(index!=-1)
{
new cluster()
//setting the cluster
cluster.setCentroid(getCenterPoint(index));
cluster.setSigmas(sigmas);
RecalculatePotential(index);
}
else
{
flag=true;
}
}
}
public void dataNormalize()
{
//getting the max and min data point
for(int m=0;m<numofdimen;m++)
{
min[m]=data[m][0];
for(int i=0;i<numofPoints;i++)
{
if(min[m]>data[m][i])
{
min[m]=data[m][i];
}
}
}
for(int m=0;m<numofdimen;m++)
{
max[m]=data[m][0];
for(int i=0;i<numofPoints;i++)
{
if(max[m]<data[m][i])
{
max[m]=data[m][i];
}
}
}
//normalizing
for(int m=0;m<numofdimen;m++)
{
for(int i=0;i<numofPoints;i++)
{
normData[m][i]=(data[m][i]-min[m])/(max[m]-min[m]);
}
}
}
public void calculatePotential(){
double distance=0;
double tempPotential=0;
for(int k=0;k<numofPoints;k++)
{
for(int i=0;i<numofPoints;i++)
{
if(k!=i)
{
for(int m=0;m<numofdimen;m++)
{
distance+=normData[m][k]-normData[m][i];
}
tempPotential=(Math.exp(-1* alpha *Math.pow(distance,2)));
if(i!=0)
{
//here p is an object of potential class and here we are getting the previous set potentials
tempPotential+=previousPotentials.getValue();
}
}
}
p.setPotentials(k,tempPotential);
Potentials.add(p);
p=new Potential();
}
}
void RecalculatePotential(int index_of_center)
{
double distance=0;
double tempPotential;
for(int k=0;k<numofPoints;k++)
{
if(k!=index_of_center)
{
for(int m=0;m<numofdimen;m++)
{
distance+=normData[m][k]-normData[m][index_of_center];
}
tempPotential=(Math.exp(-1*beta*Math.pow(distance,2)));
tempPotential=((Potentials.get(k))-(((Potentials.get(index_of_center)))*tempPotential);
p =new Potential();
p.setPotentials(k,tempPotential);
Potentials.set(k,p);
p=new Potential();
}
}
}
boolean ifNewCenter(int index_of_center)
{
//if not new return false
//if new return true
}
double getMinDistance(int index_of_center)
{
double vectorDistances[]=new double[numofdimen];
double distances[]=new double[centersArrayList.size()];
double minDistanceistance;
for(int j=0;j<centersArrayList.size();j++)
{
for(int m=0;m<numofdimen;m++)
{
if(index_of_center!=j)
{
vectorDistances[m]=normData[m][index_of_center]-normData[m][((Integer)(centers.get(j))).intValue()];
}
}
distances[j]=calculateVLength(vectorDistances);
}
//sort the distances
return distances[0];
}
public void sortPotentials()
{
//returns the sorted list of potentials
}
public int setCenters(int maxIndex)
{
double minDistance;
double PotentialCenter;
PotentialCenter=((Double)(sortedPotentials.get(maxIndex))).doubleValue();
if(centersArrayList.size()!=0)
{
if(ifNewCenter()) // here we are checking the the center is new or not
{
minDistance=getMinDistance(maxIndex);
if(PotentialCenter>((acceptRatio)*((Potential)Potentials.get(Potentials.size()-1)).getValue()))
centersArrayList.add(((Integer)((Potential)(sortedPotentials.get(maxIndex))).getIndex()).intValue());
else if(clusteringEnd(maxIndex))
flag=true;
else if((minDistance/radii)+(PotentialCenter/Pi)<1)
{
p=new Potential();
p.setPotentials(maxIndex,0);
Potentials.set(maxIndex,p);
if(maxIndex>0)
{
setCenters(maxIndex-1);
}
else
{
noCenter=true;
return 0;
}
}
else
{
// System.out.println("flag is true nwo------------------------------------");
centersArrayList.add(((Integer)((Potential)(sortedPotentials.get(maxIndex))).getIndex()).intValue());
}
}
else
{
if(maxIndex>0)
{
setCenters(maxIndex-1);
}
else
{
noCenter=true;
return 0;
}
}
}
else
{
centersArrayList.add(((Integer)((Potential)(sortedPotentials.get(maxIndex))).getIndex()).intValue());
Pi=PotentialCenter;
}
if(!noCenter || !flag)
{
return ((Integer)(centersArrayList.get(centersArrayList.size()-1))).intValue();
}
else
{
return -2;
}
}
public boolean clusteringEnd(int centerindex)
{
//comparing the current potential with the rejectFactor* first largest potential
if((((Potential)(Potentials.get(centerindex))))<(rejectFactor*(((Potential)(Potentials.get(Potentials.size()-1))))))
return true;
return false;
}
public double[] calculateSigmas()
{
double sigmas[]=new double[numofdimen];
for(int m=0;m<numofdimen;m++)
{
sigmas[m]=(rj[m]*(max[m]-min[m]))/(Math.sqrt(8.0));
}
return sigmas;
}
public double calculateVLength(double input[]){
double temp=0;
double length=0;
for(int i=0;i<input.length;i++)
{
temp+=Math.pow(input[i],2);
}
length=Math.sqrt(temp);
return length;
}
public static void main(String[] args) {
double Points[][]={ {0,.16,.24,.42,.48,.66,.83,.24,.42,.48,.66,.66,.16,.24,.42,.42,.48,.48,.48,.66,.66,.66,.66,.66,.66,.66,.83,.83,.83,.66},
{0,0,0,0,0,0,0,.15,.13,.1,.12,.18,.58,.78,.59,.78,.45,.49,.58,.45,.49,.58,.65,.71,.715,.72,.66,.725,.726,.455}
};
SClustering sc;
sc=new SClustering(Points,.4);
}
}
但我在代码中的问题是:-
当我运行我的程序时,我只得到了两个带有 centroid1 的集群:0.83,0.725 centroid2:- 0.83,0.726
但是当我在上述相同的数据集上执行 Matlab 'clusterfind' 程序时,我得到了 3 个集群
质心1:0.66,0.65 质心2:- 0.48,0.10 质心3:- 0.16,0.0
下图中显示的各种参数值在我的实现中也是相同的
那么我正在实施的算法有什么问题,请给我指导