我的 ServletContextListener:
@WebListener
public class RunServlet implements ServletContextListener {
private ScheduledExecutorService scheduler;
@Override
public void contextInitialized(ServletContextEvent event) {
System.out.println("ready");
scheduler = Executors.newScheduledThreadPool(10);
scheduler.execute(new RunThread("http://stackoverflow.com"));
}
@Override
public void contextDestroyed(ServletContextEvent event) {
scheduler.shutdownNow();
System.out.println("removed");
}
}
实现 Runnable 的类是 RunThread(从网页中获取所有链接,点击链接,解析网页并使用 jsoup 和 hibernate 将单词保存到数据库中):
public class RunThread implements Runnable{
private Document html;
private String url;
private static final int threads_num = Runtime.getRuntime().availableProcessors()*4;
private int links = 0;
private int alinks = 0;
public RunThread(String url){
this.url = url;
try {
this.html = Jsoup.connect(url).get();
this.links = html.select("a[href]").size();
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void run() {
if(alinks != links){
Elements collectedLinks = html.select("a[href]");
ExecutorService executor = Executors.newFixedThreadPool(threads_num);
for(Element link:collectedLinks){
if(alinks == links) break;
else{
String current = link.attr("abs:href");
if(!current.equals(url) && current.startsWith(url)&& !current.contains("#")){
executor.execute(new RunThread(current));
alinks++;
}
}
}
}
AnalyzePage(html, url);
}
private void AnalyzePage(Document doc,String url){
String text = doc.body().text();
SaveTextToDB(text,url);
}
public void SaveTextToDB(String text, String link){
TreeMap<String, Integer> frequencyMap = new TreeMap<String, Integer>();
StringTokenizer parser =
new StringTokenizer(text.replaceAll("[0-9]+","").replaceAll("[^a-zA-Zа-яА-Я]-[^a-zA-Zа-яА-Я]", " "), " \t\n\r\f.,;:!?%#+№/<←→↓@'\"—«»©“\\(\\)");
while (parser.hasMoreTokens()) {
String currentWord = parser.nextToken();
Integer frequency = frequencyMap.get(currentWord);
if (frequency == null) {
frequency = 0;
}
frequencyMap.put(currentWord, frequency + 1);
}
for (Map.Entry<String,Integer> entry : frequencyMap.entrySet()){
Indexation word = new Indexation();
IndexationPK pk = new IndexationPK();
pk.setLink(link);
pk.setWord(entry.getKey());
word.setFrequency(entry.getValue());
word.setIndexationPK(pk);
IndexationDAO indDAO = new IndexationDAOImpl();
indDAO.AddRecord(word);
}}
}
我收到下一个错误:
java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.read(SocketInputStream.java:129)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:695)
at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:640)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1195)
at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:379)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:381)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:364)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:143)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:132)
at com.mstu.service.RunThread.<init>(RunThread.java:35)
at com.mstu.service.RunThread.run(RunThread.java:53)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
Exception in thread "pool-9-thread-1" java.lang.NullPointerException
at com.mstu.service.RunThread.AnalyzePage(RunThread.java:63)
at com.mstu.service.RunThread.run(RunThread.java:59)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.read(SocketInputStream.java:129)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:695)
at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:640)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1195)
at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:379)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:381)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:364)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:143)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:132)
at com.mstu.service.RunThread.<init>(RunThread.java:35)
at com.mstu.service.RunThread.run(RunThread.java:53)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
java.net.SocketTimeoutException: Read timed out
怎么了?请帮我。