我正在尝试将 Apriori 算法转换为 Eclat 算法。我的 Apriori 算法以水平格式执行垂直项中的事务并返回第 n 个频繁项集。
我需要的 Eclat 算法需要在垂直方向上设置项目,并且应该在水平方向上处理事务。和我的 Apriori 一样,它需要返回项集的交集。
项目集
0 1 1 0 1 1 1 0 0 1 1 1 1 1 1 交易
0 1 1 0 1 1 1 0 0 1 0 1 1 1 0
0 1 1 0 0 1 1 0 0 1 0 1 1 1 1
0 0 1 0 1 1 1 0 0 1 1 1 1 1 1
0 1 1 0 1 1 0 1 0 1 1 1 1 1 0
0 1 1 1 1 1 0 0 0 1 1 1 1 1 1
0 0 1 1 1 1 0 0 0 1 0 1 1 1 1
1 1 1 1 1 1 0 0 0 1 1 1 1 0 1
0 1 1 1 1 1 0 0 0 1 0 1 1 0 1
1 1 1 1 1 0 0 0 0 1 1 1 1 0 0
转置它不是问题,通过水平搜索找到频繁项集是。
private void generateCandidates(int n)
{
Vector<String> tempCandidates = new Vector<String>(); //temporary candidate string vector
String str1, str2; //strings that will be used for comparisons
StringTokenizer st1, st2; //string tokenizers for the two itemsets being compared
//if its the first set, candidates are just the numbers
if(n==1)
{
for(int i=1; i<=numItems; i++)
{
tempCandidates.add(Integer.toString(i));
}
}
else if(n==2) //second itemset is just all combinations of itemset 1
{
//add each itemset from the previous frequent itemsets together
for(int i=0; i<candidates.size(); i++)
{
st1 = new StringTokenizer(candidates.get(i));
str1 = st1.nextToken();
for(int j=i+1; j<candidates.size(); j++)
{
st2 = new StringTokenizer(candidates.elementAt(j));
str2 = st2.nextToken();
tempCandidates.add(str1 + " " + str2);
}
}
}
else
{
//for each itemset
for(int i=0; i<candidates.size(); i++)
{
//compare to the next itemset
for(int j=i+1; j<candidates.size(); j++)
{
//create the strigns
str1 = new String();
str2 = new String();
//create the tokenizers
st1 = new StringTokenizer(candidates.get(i));
st2 = new StringTokenizer(candidates.get(j));
//make a string of the first n-2 tokens of the strings
for(int s=0; s<n-2; s++)
{
str1 = str1 + " " + st1.nextToken();
str2 = str2 + " " + st2.nextToken();
}
//if they have the same n-2 tokens, add them together
if(str2.compareToIgnoreCase(str1)==0)
tempCandidates.add((str1 + " " + st1.nextToken() + " " + st2.nextToken()).trim());
}
}
}
//clear the old candidates
candidates.clear();
//set the new ones
candidates = new Vector<String>(tempCandidates);
tempCandidates.clear();
}
/************************************************************************
* Method Name : calculateFrequentItemsets
* Purpose : Determine which candidates are frequent in the n-th itemsets
* : from all possible candidates
* Parameters : n - iteger representing the current itemsets being evaluated
* Return : None
*************************************************************************/
private void calculateFrequentItemsets(int n)
{
Vector<String> frequentCandidates = new Vector<String>(); //the frequent candidates for the current itemset
FileInputStream file_in; //file input stream
BufferedReader data_in; //data input stream
FileWriter fw;
BufferedWriter file_out;
StringTokenizer st, stFile; //tokenizer for candidate and transaction
boolean match; //whether the transaction has all the items in an itemset
boolean trans[] = new boolean[numItems]; //array to hold a transaction so that can be checked
int count[] = new int[candidates.size()]; //the number of successful matches
try
{
//output file
fw= new FileWriter(outputFile, true);
file_out = new BufferedWriter(fw);
//load the transaction file
file_in = new FileInputStream(transaFile);
data_in = new BufferedReader(new InputStreamReader(file_in));
//for each transaction
for(int i=0; i<numTransactions; i++)
{
//System.out.println("Got here " + i + " times"); //useful to debug files that you are unsure of the number of line
stFile = new StringTokenizer(data_in.readLine(), itemSep); //read a line from the file to the tokenizer
//put the contents of that line into the transaction array
for(int j=0; j<numItems; j++)
{
trans[j]=(stFile.nextToken().compareToIgnoreCase(oneVal[j])==0); //if it is not a 0, assign the value to true
}
//check each candidate
for(int c=0; c<candidates.size(); c++)
{
match = false; //reset match to false
//tokenize the candidate so that we know what items need to be present for a match
st = new StringTokenizer(candidates.get(c));
//check each item in the itemset to see if it is present in the transaction
while(st.hasMoreTokens())
{
match = (trans[Integer.valueOf(st.nextToken())-1]);
if(!match) //if it is not present in the transaction stop checking
break;
}
if(match) //if at this point it is a match, increase the count
count[c]++;
}
}
for(int i=0; i<candidates.size(); i++)
{
// System.out.println("Candidate: " + candidates.get(c) + " with count: " + count + " % is: " + (count/(double)numItems));
//if the count% is larger than the minSup%, add to the candidate to the frequent candidates
if((count[i]/(double)numTransactions)>=minSup)
{
frequentCandidates.add(candidates.get(i));
//put the frequent itemset into the output file
file_out.write(candidates.get(i) + "," + count[i]/(double)numTransactions + "\n");
}
}
file_out.write("-\n");
file_out.close();
}
//if error at all in this process, catch it and print the error messate
catch(IOException e)
{
System.out.println(e);
}
//clear old candidates
candidates.clear();
//new candidates are the old frequent candidates
candidates = new Vector<String>(frequentCandidates);
frequentCandidates.clear();
}
}