我需要更多帮助。我正在尝试获取表格行的所有列,但变量只获得 3 个单元格,我查看了这个页面的 html,有 7 列。
我很感激帮助。
按照代码:
公共类 BotEditalCapturaProcesoEletronicoDispensaRJ { 私有静态最终字符串 url = "https://www.compras.rj.gov.br/Portal-Siga/ProcessoEletronicoDispensa/buscar.action";
public static void main(String[] args){
disableLogs();
System.out.println("Starting RPA PED ...");
initialProcess();
}
public static void initialProcess(){
// Init process
WebClient client = new WebClient(BrowserVersion.BEST_SUPPORTED);
CookieManager cookieManager = client.getCookieManager();
cookieManager.setCookiesEnabled(true);
client.setCookieManager(cookieManager);
client.getOptions().setPopupBlockerEnabled(true);
client.getOptions().setThrowExceptionOnFailingStatusCode(false);
client.getOptions().setJavaScriptEnabled(true);
client.getOptions().setCssEnabled(false);
client.getOptions().setUseInsecureSSL(true);
client.getOptions().setRedirectEnabled(true);
client.setAjaxController(new NicelyResynchronizingAjaxController());
client.getOptions().setThrowExceptionOnScriptError(false);
try{
System.setProperty("https.protocols", "TLSv1,TLSv1.1,TLSv1.2");
System.out.println("Getting website...");
// Get the website
HtmlPage webPortal = client.getPage(url);
client.waitForBackgroundJavaScript(5 * 1000);
Set<Cookie> cookies = client.getCookieManager().getCookies();
// Get the status select list and search link
HtmlSelect selectStatus = (HtmlSelect) webPortal.getElementById("andamentoPed");
HtmlAnchor btnSearch = (HtmlAnchor) webPortal.getElementById("pesquisar");
// List the status options
DomNodeList<HtmlElement> optionStatusList = selectStatus.getElementsByTagName("option");
// Iterate over each status option
for(int i = 0; i < optionStatusList.size(); i++){
HtmlOption currentOption = (HtmlOption) optionStatusList.get(i);
// Selecione it's a default text without value, we're going to search in the other options
if(!currentOption.getText().equals("Selecione")){
// Select the current iteration status and click search button
selectStatus.setSelectedAttribute(currentOption, true);
btnSearch.click();
// Wait for the page
client.waitForBackgroundJavaScript(15 * 1000);
Page page = client.getCurrentWindow().getEnclosedPage();
page.initialize();
HtmlPage dataPage;
// Sometimes, the page is returned as TextPage, compare this cases
if(page instanceof TextPage){
System.out.println(currentOption.getText() + " it's a TextPage");
continue;
}
else{
dataPage = (HtmlPage) page;
}
System.out.println(currentOption.getText() + " page: " + dataPage.getBaseURL().toString());
// If the result is a HtmlPage and the URL is distinct to original page we have results in the current status
if(webPortal == dataPage) {
System.out.println("Not results for : " + currentOption.getText());
}
else{
System.out.println("Results for: " + currentOption.getText());
readPEDTable(dataPage, currentOption.getText());
}
// Return to original search page to go for next status
client.getCurrentWindow().getHistory().back();
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
client.getCurrentWindow().getJobManager().removeAllJobs();
client.close();
System.gc();
}
}
public static void readPEDTable(HtmlPage dataPage, String currentStatus){
HtmlTable grid = (HtmlTable) dataPage.getElementById("dataTable");
HtmlTableBody gridBody = grid.getBodies().get(0);
List<HtmlTableRow> gridRows = gridBody.getRows();
for (int i = 0; i < gridRows.size(); i++) {
HtmlTableRow currentRow = gridRows.get(i);
List<HtmlTableCell> cells = currentRow.getCells();
for (int j = 0; j < cells.size(); j++) {
System.out.print(cells.get(j).getTextContent() + " \t\t");
}
System.out.print(currentStatus);
System.out.println("");
}
}
public static void disableLogs(){
Set<String> artifactoryLoggers = new HashSet<>(Arrays.asList("org.apache.http", "com.gargoylesoftware"));
for(String log:artifactoryLoggers) {
ch.qos.logback.classic.Logger artLogger = (ch.qos.logback.classic.Logger) org.slf4j.LoggerFactory.getLogger(log);
artLogger.setLevel(ch.qos.logback.classic.Level.INFO);
artLogger.setAdditive(false);
}
}
}