嗨,我正在为一个扫描邮件以确定它们是否是垃圾邮件的应用程序开发一个电子邮件过滤器,这是我的课程:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.mail.MessagingException;
import javax.mail.internet.MimeMessage;
import ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer;
public class MotsClesFilter implements EmailFilter {
final String NAME = "Filtrage par mots cles";
private Pattern chaineSpam;
private Matcher chaineCourriel;
private int nbOccMotSpam =0;
private byte confidenceLevel;
@Override
public String getFilterName() {
return this.NAME;
}
@Override
public byte checkSpam(MimeMessage message) {
analyze(message);
switch(this.nbOccMotSpam){
case 0:
this.confidenceLevel = 1;
break;
case 1:
this.confidenceLevel = CANT_SAY;
break;
case 2:
this.confidenceLevel= 50;
break;
case 3:
this.confidenceLevel = 70;
break;
case 4 :
this.confidenceLevel = 80;
break;
} return (getConfidenceLevel());
}
public void analyze(MimeMessage message){
try {
List<String> listeChaines = new ArrayList<String>();
BufferedReader bis = new BufferedReader(new InputStreamReader(new FileInputStream(new File("SpamWords.txt"))));
while(bis.ready()){
String ligne = bis.readLine();
listeChaines.add(ligne);
}
String[] tabMots = EmailSplicer.getMessageContent(message);
for (int i =0;i<tabMots.length;i++){
/*System.out.print("*************************************");
System.out.print(tabMots[0]);
System.out.print("**************************************");*/
for (int j =0; j<listeChaines.size();j++){
this.chaineSpam = Pattern.compile(listeChaines.get(j));
this.chaineCourriel = this.chaineSpam.matcher(tabMots[i]);
if (this.chaineCourriel.matches())
this.nbOccMotSpam++;
}
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (MessagingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public byte getConfidenceLevel() {
// TODO Auto-generated method stub
return this.confidenceLevel;
}
@Override
public boolean enabled() {
// TODO Auto-generated method stub
return true;
}
}
这是我正在使用的 EmailSplicer 实用程序类:
import java.io.IOException;
import java.util.ArrayList;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.internet.MimeMessage;
/**
* Utility class to return all the content of a MimeMessage
* @author Maxime Caumartin <maxime.caumartin.1@ens.etsmtl.ca>
*/
public class EmailSplicer {
/**
* Contains the types of email parts that can be analyzed by this class.
* @author Maxime Caumartin <maxime.caumartin.1@ens.etsmtl.ca>
*/
private enum ContentTypes
{
Plain("text/plain"), HTML("text/html"), Multipart("multipart"), Unknown(
"?");
private String type;
ContentTypes(String type)
{
this.type = type;
}
public static ContentTypes getType(String type)
{
if (type.contains(Plain.type))
return Plain;
if (type.contains(HTML.type))
return HTML;
if (type.contains(Multipart.type))
return Multipart;
return Unknown;
}
}
/**
* Recursive method that passes through all the parts of the Mutlipart message and returns an ArrayList<String> of the content of these parts.
* @param multiPartMsg The Multipart that needs to be dissected.
* @return The ArrayList<String> containing all the content of the Mutlipart message.
* @throws MessagingException Exception thrown if the analyzer cannot read the message.
* @throws IOException Exception thrown if the encoding type isn't valid.
*/
private static ArrayList<String> getMutlipartContent(Multipart multiPartMsg)
throws MessagingException, IOException
{
ArrayList<String> returnTable = new ArrayList<String>(
multiPartMsg.getCount());
for (int i = 0; i < multiPartMsg.getCount(); i++)
{
switch (ContentTypes.getType(multiPartMsg.getBodyPart(i)
.getContentType()))
{
case Plain:
returnTable.add((String) multiPartMsg.getBodyPart(i)
.getContent());
break;
case HTML:
String s = org.clapper.util.html.HTMLUtil.textFromHTML((String) multiPartMsg.getBodyPart(i)
.getContent()).trim();
if (s.length() != 0)
returnTable.add(s);
break;
case Multipart:
returnTable
.addAll(getMutlipartContent((Multipart) multiPartMsg
.getBodyPart(i).getContent()));
break;
default:
}
}
return returnTable;
}
/**
* Returns all the content of the MimeMessage passed as a parameter. The whole content will be parsed.
* @param message The MimeMessage containing textual information.
* @return The array of string containing all the strings from the content of the message.
* @throws MessagingException Exception thrown if the analyzer cannot read the message.
* @throws IOException Exception thrown if the encoding type isn't valid.
*/
public static String[] getMessageContent(MimeMessage message)
throws MessagingException, IOException
{
String contentType = message.getContentType();
switch (ContentTypes.getType(contentType))
{
case Plain:
return new String[] { (String) message.getContent() };
case Multipart:
return getMutlipartContent(
(Multipart) message.getContent()).toArray(new String[0]);
case HTML:
String s = org.clapper.util.html.HTMLUtil.textFromHTML((String) message
.getContent()).trim();
if (s.length() != 0)
return new String[] {s};
default:
return new String[0];
}
}
}
现在,当我执行整个应用程序的主要方法时,这是我得到的异常:
java.io.UnsupportedEncodingException: iso-0621-9
at sun.nio.cs.StreamDecoder.forInputStreamReader(Unknown Source)
at java.io.InputStreamReader.<init>(Unknown Source)
at com.sun.mail.handlers.text_plain.getContent(text_plain.java:82)
at javax.activation.DataSourceDataContentHandler.getContent(Unknown Source)
at javax.activation.DataHandler.getContent(Unknown Source)
at javax.mail.internet.MimeBodyPart.getContent(MimeBodyPart.java:629)
at ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer.getMutlipartContent(EmailSplicer.java:69)
at ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer.getMessageContent(EmailSplicer.java:101)
at ca.etsmtl.logti.log619.lab05.filter.MotsClesFilter.analyze(MotsClesFilter.java:66)
at ca.etsmtl.logti.log619.lab05.filter.MotsClesFilter.checkSpam(MotsClesFilter.java:34)
at ca.etsmtl.logti.log619.lab05.Application.main(Application.java:107)
有人可以告诉我如何解决它吗?