我使用 RSelenium 来废弃埃克塞特邮政编码的市政税清单:
library(RSelenium)
library(RCurl)
input = 'EX4 2NU'
appURL <- "http://cti.voa.gov.uk/cti/"
RSelenium::startServer()
remDr <- remoteDriver()
remDr$open()
Sys.sleep(5)
remDr$navigate(appURL)
search.form <- remDr$findElement(using = "xpath", "//*[@id='txtPostCode']")
search.form$sendKeysToElement(list(input, key = "enter"))
doc <- remDr$getPageSource()
tbl = xpathSApply(htmlParse(doc[[1]]),'//tbody')
temp1 = readHTMLTable(tbl[[1]],header=F)
v = length(xpathSApply(htmlParse(doc[[1]]),'//a[@class="next"]'))
while (v != 0) {
nextpage <- remDr$findElement(using = "xpath", "//*[@class = 'next']")
nextpage$clickElement()
doc <- remDr$getPageSource()
tbl = xpathSApply(htmlParse(doc[[1]]),'//tbody')
temp2 = readHTMLTable(tbl[[1]],header=F)
temp1 = rbind(temp1,temp2)
v = length(xpathSApply(htmlParse(doc[[1]]),'//a[@class="next"]'))
}
finaltable = temp1
希望对您有所帮助。有了这个,您可以废弃多个页面数据。