我有以下三个类:我尝试制作 1 和 2 的例程并使用 tjava 调用主类和 1 和 2 中的方法,但我无法获取这些方法。
1)
package page_scraper;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebClientOptions;
import com.gargoylesoftware.htmlunit.html.FrameWindow;
import com.gargoylesoftware.htmlunit.html.HtmlButtonInput;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlOption;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSelect;
import com.gargoylesoftware.htmlunit.html.HtmlTextInput;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Writer;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import page_scraper.UnitArray;
public class PageScraper {
public void Scrape() throws IOException {
try {
UnitArray object = new UnitArray();
ArrayList<String> unitList = object.getUnitArray();
WebClient webClient = new WebClient();
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
HtmlPage page = (HtmlPage)webClient.getPage("http://www.bmreports.com/servlet/com.logica.neta.bwp_PanBMUData");
List frames = page.getFrames();
HtmlPage page1 = (HtmlPage)((FrameWindow)frames.get(0)).getEnclosedPage();
HtmlTextInput settlementDay = (HtmlTextInput)page1.getHtmlElementById("param5");
HtmlSelect period = (HtmlSelect)page1.getHtmlElementById("param6");
HtmlOption periodOption = period.getOption(1);
HtmlTextInput unitId = (HtmlTextInput)page1.getHtmlElementById("param1");
HtmlButtonInput button = (HtmlButtonInput)page1.getHtmlElementById("go_button");
String outputLocation = String.valueOf(System.getProperty("user.home")) + "/Documents/output.csv";
FileWriter fileWriter = new FileWriter(outputLocation);
String errorLocation = String.valueOf(System.getProperty("user.home")) + "/Documents/error.csv";
FileWriter errorWriter = new FileWriter(errorLocation);
int i = 0;
while (i < unitList.size()) {
int x = 0;
while (x < 365) {
String errorData;
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
Calendar cal = Calendar.getInstance();
cal.add(5, - x);
String dateValue = dateFormat.format(cal.getTime());
System.out.println(dateValue);
settlementDay.setValueAttribute(dateValue);
period.setSelectedAttribute(periodOption, true);
unitId.setValueAttribute(unitList.get(i));
System.out.println(unitList.get(i));
try {
button.click();
HtmlPage page2 = (HtmlPage)((FrameWindow)frames.get(1)).getEnclosedPage();
String pageSource = page2.asXml();
int firstIndex = pageSource.indexOf("csv=") + 38;
int secondIndex = pageSource.indexOf("n\"") + 1;
String csvData = pageSource.substring(firstIndex, secondIndex);
fileWriter.append(csvData);
}
catch (ClassCastException e) {
errorData = String.valueOf(dateValue) + " " + unitList.get(i) + System.getProperty("line.separator");
System.out.println(errorData);
errorWriter.append(errorData);
continue;
}
catch (StringIndexOutOfBoundsException e) {
errorData = String.valueOf(dateValue) + " " + unitList.get(i) + System.getProperty("line.separator");
System.out.println(errorData);
errorWriter.append(errorData);
continue;
}
++x;
}
++i;
}
webClient.close();
fileWriter.close();
errorWriter.close();
}
catch (IOException e) {
e.printStackTrace();
}
}
}
2)
package page_scraper;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
public class UnitArray {
public ArrayList<String> getUnitArray() {
String csvList = "abc,xyz";
ArrayList<String> list = new ArrayList<String>(Arrays.asList(csvList.split(",")));
return list;
}
}
3)
package page_scraper;
import page_scraper.PageScraper;
public class main {
public static void main(String[] args) throws Exception {
PageScraper test = new PageScraper();
test.Scrape();
}
}
我在 Talend 中为上述代码 (1) 和 2)) 制作了例程,然后使用 tjava 调用该方法但无法这样做..我还尝试对所有人使用 tjava 并在每个 tjava 上执行 onSubjob ok。如何在 talend 中调用这些类并调用方法?