这就是我会做的:
library(dplyr)
library(xml2)
library(rvest)
library(tidyr)
library(purrr)
times <- function(x){
try(
x %>%
read_html() %>%
html_table(header = FALSE) %>%
flatten() %>%
as_tibble()
)
}
urls <- c('https://www.life.church/edmond/', 'https://www.life.church/fortworth/')
lapply(urls, times) %>%
set_names(urls) %>%
bind_rows(.id = "URL") %>%
separate(X1, into = c("Time", "Day"), sep = "(?=^\\D)") %>%
fill(Day) %>%
filter(Time != "") %>%
select(URL, Day, Time)
# A tibble: 16 x 3
URL Day Time
<chr> <chr> <chr>
1 https://www.life.church/edmond/ Saturday 4:00 PM
2 https://www.life.church/edmond/ Saturday 5:30 PM
3 https://www.life.church/edmond/ Sunday 8:30 AM
4 https://www.life.church/edmond/ Sunday 10:00 AM
5 https://www.life.church/edmond/ Sunday 11:30 AM
6 https://www.life.church/edmond/ Sunday 1:00 PM
7 https://www.life.church/edmond/ Sunday 4:00 PM
8 https://www.life.church/edmond/ Sunday 5:30 PM
9 https://www.life.church/edmond/ Wednesday 7:00 PM
10 https://www.life.church/fortworth/ Saturday 4:00 PM
11 https://www.life.church/fortworth/ Saturday 5:30 PM
12 https://www.life.church/fortworth/ Sunday 8:30 AM
13 https://www.life.church/fortworth/ Sunday 10:00 AM
14 https://www.life.church/fortworth/ Sunday 11:30 AM
15 https://www.life.church/fortworth/ Sunday 1:00 PM
16 https://www.life.church/fortworth/ Wednesday 7:00 PM
separate()
使用前瞻正则表达式将不以数字开头的条目分隔到新列中Day