我正在尝试实现一个可以检测网络钓鱼网站的系统。输入将包括通过网页获取的网站 URL。我正在使用 UCI 网络钓鱼数据集(大约有 30 个属性),并且我正在训练和测试 J48 分类模型来预测未来的实例。
属性
@relation phishing
@attribute having_IP_Address { -1,1 }
@attribute URL_Length { 1,0,-1 }
@attribute Shortining_Service { 1,-1 }
@attribute having_At_Symbol { 1,-1 }
@attribute double_slash_redirecting { -1,1 }
@attribute Prefix_Suffix { -1,1 }
@attribute having_Sub_Domain { -1,0,1 }
@attribute SSLfinal_State { -1,1,0 }
@attribute Domain_registeration_length { -1,1 }
@attribute Favicon { 1,-1 }
@attribute port { 1,-1 }
@attribute HTTPS_token { -1,1 }
@attribute Request_URL { 1,-1 }
@attribute URL_of_Anchor { -1,0,1 }
@attribute Links_in_tags { 1,-1,0 }
@attribute SFH { -1,1,0 }
@attribute Submitting_to_email { -1,1 }
@attribute Abnormal_URL { -1,1 }
@attribute Redirect { 0,1 }
@attribute on_mouseover { 1,-1 }
@attribute RightClick { 1,-1 }
@attribute popUpWidnow { 1,-1 }
@attribute Iframe { 1,-1 }
@attribute age_of_domain { -1,1 }
@attribute DNSRecord { -1,1 }
@attribute web_traffic { -1,0,1 }
@attribute Page_Rank { -1,1 }
@attribute Google_Index { 1,-1 }
@attribute Links_pointing_to_page { 1,0,-1 }
@attribute Statistical_report { -1,1 }
@attribute Result { -1,1 }
代码
package weka1;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.Random;
import weka.classifiers.Evaluation;
import weka.classifiers.trees.J48;
import weka.core.Instances;
public class file1 {
public static void main(String[] args) throws Exception
{
BufferedReader breader= null;
breader=new BufferedReader( new FileReader("C:\\Users\\.Gayatjhri\\Desktop\\phishing_training.arff"));
Instances train1= new Instances(breader);
train1.setClassIndex(30);
breader=new BufferedReader( new FileReader("C:\\Users\\.Gayatjhri\\Desktop\\phishing_test.arff"));
Instances test= new Instances(breader);
test.setClassIndex(30);
breader.close();
J48 tree=new J48();
tree.buildClassifier(train1);
Instances labeled= new Instances(test);
Evaluation eval= new Evaluation(train1);
eval.crossValidateModel(tree, train1, 10, new Random(1));
System.out.println(eval.toSummaryString("Results\n==========================================\n", true));
System.out.println(eval.fMeasure(1)+ ""+ eval.precision(1)+ ""+ eval.recall(1));
for(int i=0; i<test.numInstances(); i++){
double clsLabel= tree.classifyInstance(test.instance(i));
labeled.instance(i).setClassValue(clsLabel);
}
BufferedWriter writer=new BufferedWriter(
new FileWriter("C:\\Users\\.Gayatjhri\\Desktop\\phishing_labeledd.arff"));
writer.write(labeled.toString());
writer.close();
}
}
如何为任何给定的 URL 为这 30 个属性分配值?