0

嗨,我正在为一个扫描邮件以确定它们是否是垃圾邮件的应用程序开发一个电子邮件过滤器,这是我的课程:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.mail.MessagingException;
import javax.mail.internet.MimeMessage;

import ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer;

public class MotsClesFilter implements EmailFilter {

    final String NAME = "Filtrage par mots cles";
    private Pattern chaineSpam;
    private Matcher chaineCourriel;
    private int nbOccMotSpam =0;
    private byte confidenceLevel;
    @Override
    public String getFilterName() {
        return this.NAME;

    }

    @Override
    public byte checkSpam(MimeMessage message) {
        analyze(message);
        switch(this.nbOccMotSpam){
        case 0:
            this.confidenceLevel = 1;
            break;
        case 1:
            this.confidenceLevel = CANT_SAY;
            break;
        case 2:
            this.confidenceLevel= 50;
            break;
        case 3:
            this.confidenceLevel = 70;
            break;
        case 4 :
            this.confidenceLevel = 80;
            break;



        } return (getConfidenceLevel());
    }


    public void analyze(MimeMessage message){
        try {
            List<String> listeChaines = new ArrayList<String>(); 
            BufferedReader bis = new BufferedReader(new InputStreamReader(new FileInputStream(new File("SpamWords.txt"))));
            while(bis.ready()){
                String ligne = bis.readLine();
                listeChaines.add(ligne);
            }
            String[] tabMots = EmailSplicer.getMessageContent(message);
            for (int i =0;i<tabMots.length;i++){
                /*System.out.print("*************************************");
                System.out.print(tabMots[0]);
                System.out.print("**************************************");*/
                for (int j =0; j<listeChaines.size();j++){
                    this.chaineSpam = Pattern.compile(listeChaines.get(j));
                    this.chaineCourriel = this.chaineSpam.matcher(tabMots[i]);
                    if (this.chaineCourriel.matches())
                        this.nbOccMotSpam++;

                }
            }
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (MessagingException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    @Override
    public byte getConfidenceLevel() {
        // TODO Auto-generated method stub
        return this.confidenceLevel;
    }

    @Override
    public boolean enabled() {
        // TODO Auto-generated method stub
        return true;
    }
}

这是我正在使用的 EmailSplicer 实用程序类:

import java.io.IOException;
import java.util.ArrayList;

import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.internet.MimeMessage;

/**
 * Utility class to return all the content of a MimeMessage
 * @author Maxime Caumartin <maxime.caumartin.1@ens.etsmtl.ca>
 */
public class EmailSplicer {

    /**
     * Contains the types of email parts that can be analyzed by this class.
     * @author Maxime Caumartin <maxime.caumartin.1@ens.etsmtl.ca>
     */
    private enum ContentTypes
    {
        Plain("text/plain"), HTML("text/html"), Multipart("multipart"), Unknown(
                "?");

        private String  type;

        ContentTypes(String type)
        {
            this.type = type;
        }

        public static ContentTypes getType(String type)
        {
            if (type.contains(Plain.type))
                return Plain;
            if (type.contains(HTML.type))
                return HTML;
            if (type.contains(Multipart.type))
                return Multipart;
            return Unknown;
        }

    }

    /**
     * Recursive method that passes through all the parts of the Mutlipart message and returns an ArrayList<String> of the content of these parts.
     * @param multiPartMsg The Multipart that needs to be dissected.
     * @return The ArrayList<String> containing all the content of the Mutlipart message.
     * @throws MessagingException Exception thrown if the analyzer cannot read the message.
     * @throws IOException Exception thrown if the encoding type isn't valid.
     */
    private static ArrayList<String> getMutlipartContent(Multipart multiPartMsg)
            throws MessagingException, IOException
    {
        ArrayList<String> returnTable = new ArrayList<String>(
                multiPartMsg.getCount());

        for (int i = 0; i < multiPartMsg.getCount(); i++)
        {
            switch (ContentTypes.getType(multiPartMsg.getBodyPart(i)
                    .getContentType()))
            {
                case Plain:
                    returnTable.add((String) multiPartMsg.getBodyPart(i)
                            .getContent());
                    break;
                case HTML:
                    String s = org.clapper.util.html.HTMLUtil.textFromHTML((String) multiPartMsg.getBodyPart(i)
                            .getContent()).trim();
                    if (s.length() != 0)
                        returnTable.add(s);
                    break;
                case Multipart:
                    returnTable
                            .addAll(getMutlipartContent((Multipart) multiPartMsg
                                    .getBodyPart(i).getContent()));
                    break;
                default:
            }
        }
        return returnTable;
    }

    /**
     * Returns all the content of the MimeMessage passed as a parameter. The whole content will be parsed.
     * @param message The MimeMessage containing textual information.
     * @return The array of string containing all the strings from the content of the message.
     * @throws MessagingException Exception thrown if the analyzer cannot read the message.
     * @throws IOException Exception thrown if the encoding type isn't valid.
     */
    public static String[] getMessageContent(MimeMessage message)
            throws MessagingException, IOException
    {
        String contentType = message.getContentType();

        switch (ContentTypes.getType(contentType))
        {
            case Plain:
                return new String[] { (String) message.getContent() };
            case Multipart:
                return getMutlipartContent(
                        (Multipart) message.getContent()).toArray(new String[0]);
            case HTML:
                String s = org.clapper.util.html.HTMLUtil.textFromHTML((String) message
                        .getContent()).trim();
                if (s.length() != 0)
                    return new String[] {s};
            default:
                return new String[0];
        }

    }

}

现在,当我执行整个应用程序的主要方法时,这是我得到的异常:

java.io.UnsupportedEncodingException: iso-0621-9
at sun.nio.cs.StreamDecoder.forInputStreamReader(Unknown Source)
at java.io.InputStreamReader.<init>(Unknown Source)
at com.sun.mail.handlers.text_plain.getContent(text_plain.java:82)
at javax.activation.DataSourceDataContentHandler.getContent(Unknown Source)
at javax.activation.DataHandler.getContent(Unknown Source)
at javax.mail.internet.MimeBodyPart.getContent(MimeBodyPart.java:629)
at ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer.getMutlipartContent(EmailSplicer.java:69)
at ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer.getMessageContent(EmailSplicer.java:101)
at ca.etsmtl.logti.log619.lab05.filter.MotsClesFilter.analyze(MotsClesFilter.java:66)
at ca.etsmtl.logti.log619.lab05.filter.MotsClesFilter.checkSpam(MotsClesFilter.java:34)
at ca.etsmtl.logti.log619.lab05.Application.main(Application.java:107)

有人可以告诉我如何解决它吗?

4

2 回答 2

2

ISO-0621-9 不是一种编码,如果是,它也不是Java 支持的编码。我猜这甚至可能是垃圾邮件的一个很好的指标:没有有效的编码 => 垃圾邮件。

稍微搜索一下ISO 621,发现ISO-621“锰矿石——金属铁含量的测定(金属铁含量不超过 2%)——磺基水杨酸光度法”的国际标准

我想说这与计算机无关,与编码无关;)

于 2013-04-08T19:20:43.827 回答
2

本文档列出了 Java 支持的编码。

iso-0621-9 不在列表中。

于 2013-04-08T19:14:34.947 回答