考虑以下 html 文件 a.html
<html>
<frameset>
<frame src="frame_a.html">
</frameset>
</html>
frame_a.html 如下
<html>
<body>
aaaaaa
</body>
</html>
以下代码:
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.List;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.FrameWindow;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class TestFramset {
public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException, InterruptedException {
WebClient client = new WebClient(BrowserVersion.FIREFOX_17);
client.getOptions().setJavaScriptEnabled(true);
client.getOptions().setRedirectEnabled(true);
client.getOptions().setThrowExceptionOnScriptError(true);
client.getOptions().setCssEnabled(true);
client.getOptions().setUseInsecureSSL(true);
client.getOptions().setThrowExceptionOnFailingStatusCode(false);
HtmlPage page = client.getPage("file:///...a.html");
System.out.println("page as text will give nothing:"+page.asText());
System.out.println("recursive function will give:"+getText(page));
}
public static String getText (HtmlPage page) {
String text = page.asText();
List<FrameWindow> frames = page.getFrames();
for (FrameWindow frame:frames) {
text += getText((HtmlPage) frame.getEnclosedPage());
}
return text;
}
}
将给出输出
页面作为文本不会给出任何内容:
递归函数将给出:aaaaaa
我的问题是是否需要 page.asText 函数不返回框架文本的事实,以及我递归获取框架文本的方法是否是最好的方法