0

我们正在使用下面的代码来验证我们的电子邮件地址,但是当我们输入一个长电子邮件地址进行测试时,CPU 使用率达到 100% 并且程序继续运行,您能找出问题所在吗?

import java.io.Serializable;
import java.util.regex.Pattern;


public class EmailAddress1 implements Serializable {


private static final boolean ALLOW_DOMAIN_LITERALS = true;


private static final boolean ALLOW_QUOTED_IDENTIFIERS = true;


private static final String wsp = "[ \\t]"; //space or tab
private static final String fwsp = wsp + "*";

private static final String dquote = "\\\"";

private static final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F";

private static final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]";

private static final String quotedPair = "(\\\\" + asciiText + ")";


private static final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?   \\^\\_\\`\\{\\|\\}\\~]";
private static final String atom = fwsp + atext + "+" + fwsp;
private static final String dotAtomText = atext + "+" + "(" + "\\." + atext + "+)*";
private static final String dotAtom = fwsp + "(" + dotAtomText + ")" + fwsp;

private static final String qtext = "[" + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]";
private static final String qcontent = "(" + qtext + "|" + quotedPair + ")";
private static final String quotedString = dquote + "(" + fwsp + qcontent + ")*" + fwsp + dquote;

private static final String word = "((" + atom + ")|(" + quotedString + "))";
private static final String phrase = word + "+"; //one or more words.

private static final String letter = "[a-zA-Z]";
private static final String letDig = "[a-zA-Z0-9]";
private static final String letDigHyp = "[a-zA-Z0-9-]";
private static final String rfcLabel = letDig + "(" + letDigHyp + "{0,61}" + letDig + ")?";
private static final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}";

private static final String dtext = "[" + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]";
private static final String dcontent = dtext + "|" + quotedPair;
private static final String domainLiteral = "\\[" + "(" + fwsp + dcontent + "+)*" + fwsp + "\\]";
private static final String rfc2822Domain = "(" + dotAtom + "|" + domainLiteral + ")";

private static final String domain = ALLOW_DOMAIN_LITERALS ? rfc2822Domain : rfc1035DomainName;

private static final String localPart = "((" + dotAtom + ")|(" + quotedString + "))";
private static final String addrSpec = localPart + "@" + domain;
private static final String angleAddr = "<" + addrSpec + ">";
private static final String nameAddr = "(" + phrase + ")?" + fwsp + angleAddr;
private static final String mailbox = nameAddr + "|" + addrSpec;

private static final String patternString = ALLOW_QUOTED_IDENTIFIERS ? mailbox : addrSpec;
public static final Pattern VALID_PATTERN = Pattern.compile(patternString);

private String text;
private boolean bouncing = true;
private boolean verified = false;
private String label;

public EmailAddress1() {
super();
}

public static boolean isValidText(String email) {
return (email != null) && VALID_PATTERN.matcher(email).matches();
}


public static void main(String[] args) {
String addy =       "1234567asjdkasdjflaslkdjfkajdfhklsadjfhsdkljfhksadjf8901234@askdjfaskdjbfskldjbgfskdjfksdjfklsadjfksadjfkasdjfkasdjfkasjfksdjfskadjfbsdkjbfasdkjasdkjbfksdjf6789012345678901.com";
if (isValidText(addy)) {
System.out.println("Valid email address.");
} else {
System.out.println("Invalid email address!");
}
}
}

谢谢,瓦伦

4

1 回答 1

1

伙计,这个正则表达式很糟糕!稍微格式化一下看看:

(
 (
  (
   [ \t]*[a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+[ \t]*)
  |(
      \"(
    [ \t]*(
            [\x01-\x08\x0B\x0C\x0E-\x1F\x7F\x21\x23-\x5B\x5D-\x7E]|(
                \\[\x01-\x09\x0B\x0C\x0E-\x7F])
          )
    )
      *[ \t]*\")
 )
 +)
?[ \t]*<(
        (
         [ \t]*(
             [a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+(
                 \.[a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+)
             *)
         [ \t]*)
        |(
            \"(
    [ \t]*(
            [\x01-\x08\x0B\x0C\x0E-\x1F\x7F\x21\x23-\x5B\x5D-\x7E]|(
                \\[\x01-\x09\x0B\x0C\x0E-\x7F])
          )
    )
            *[ \t]*\")
        )
@(
        [ \t]*(
            [a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+(
                \.[a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+)
            *)
        [ \t]*|\[(
            [ \t]*[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\x21-\x5A\x5E-\x7E]|(
                \\[\x01-\x09\x0B\x0C\x0E-\x7F])
            +)
        *[ \t]*\])
>|(
        (
         [ \t]*(
             [a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+(
                 \.[a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+)
             *)
         [ \t]*)
        |(
            \"(
    [ \t]*(
            [\x01-\x08\x0B\x0C\x0E-\x1F\x7F\x21\x23-\x5B\x5D-\x7E]|(
                \\[\x01-\x09\x0B\x0C\x0E-\x7F])
          )
    )
            *[ \t]*\")
  )
@(
        [ \t]*(
            [a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+(
                \.[a-zA-Z0-9\!\#\$\%\&\'\*\+\-\/\=\?   \^\_\`\{\|\}\~]+)
            *)
        [ \t]*|\[(
            [ \t]*[\x01-\x08\x0B\x0C\x0E-\x1F\x7F\x21-\x5A\x5E-\x7E]|(
                \\[\x01-\x09\x0B\x0C\x0E-\x7F])
            +)
        *[ \t]*\])

这不是内存泄漏问题。您的正则表达式太复杂而无法轻松匹配。你的程序最终会返回一个有效的结果,但只有在每个有效的可能性都被尝试过之后。

你应该使用Apache Commons EmailValidator,它会更快更可靠。

于 2013-05-26T08:50:37.993 回答