这是一种使用jSoup的方法:
import java.io.File;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class so15933614 {
public static void main(String[] args) throws IOException {
String url = "so15933614.html";
print("Fetching %s...", url);
Document doc = Jsoup.parse(new File(url), "UTF-8");
Elements rows = doc.select("tr");
for (Element row : rows) {
print("---------");
Elements data = row.getElementsByTag("td");
print("First Name:%s", data.get(0).text());
print("Last Name:%s", data.get(1).text());
print("Date:%s", data.get(2).text());
print("City:%s", data.get(3).text());
}
}
private static void print(String msg, Object... args) {
System.out.println(String.format(msg, args));
}
}
输出是:
Fetching so15933614.html...
---------
First Name:john
Last Name:doe
Date:1/1/09
City:cincinnati, OH
---------
First Name:
Last Name:
Date:
City:Atlanta, GA
---------
First Name:john1
Last Name:doe1
Date:1/1/89
City:cincinnati, OH