0

考虑以下 html 文件 a.html

<html>
    <frameset>
       <frame src="frame_a.html">
    </frameset>
</html>

frame_a.html 如下

<html>
    <body>
       aaaaaa
    </body>
</html>

以下代码:

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.List;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.FrameWindow;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class TestFramset {
public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException, InterruptedException {
WebClient client = new WebClient(BrowserVersion.FIREFOX_17);
client.getOptions().setJavaScriptEnabled(true);
client.getOptions().setRedirectEnabled(true);
client.getOptions().setThrowExceptionOnScriptError(true);
client.getOptions().setCssEnabled(true);
client.getOptions().setUseInsecureSSL(true);
client.getOptions().setThrowExceptionOnFailingStatusCode(false);
HtmlPage page = client.getPage("file:///...a.html");
System.out.println("page as text will give nothing:"+page.asText());
System.out.println("recursive function will give:"+getText(page));
}

public static String getText (HtmlPage page) {
    String text = page.asText();
    List<FrameWindow> frames = page.getFrames();
    for (FrameWindow frame:frames) {
        text += getText((HtmlPage) frame.getEnclosedPage());
    }
    return text;
}

}

将给出输出

页面作为文本不会给出任何内容:

递归函数将给出:aaaaaa

我的问题是是否需要 page.asText 函数不返回框架文本的事实,以及我递归获取框架文本的方法是否是最好的方法

4

0 回答 0