如果它不是固定宽度的格式,例如:
library(purrr)
library(dplyr)
library(stringi)
lines <- "BC000068032198109TMAX 232 Q 220 Q 220 Q 244 Q 239 Q 246 Q 270 Q 300 Q 327 Q 279 Q 256 Q 260 Q 289 Q 342 Q 357 Q 359 Q 370 Q 373 Q 367 Q 370 Q 372 Q 357 Q 366 Q 365 Q 355 Q 355 Q 364 Q 343 Q 364 Q 362 Q-9999 \nBC000068032198110TMIN 180 Q 170 I 150 I 130 I 150 I 130 I 160 I 190 I 190 I 185 Q-9999 130 I 130 I 160 I 170 I 140 I 160 I 160 I 160 I 160 I 160 I-9999 190 I 180 I 160 I 165 Q 210 I 180 I-9999 190 I 170 I"
readLines(textConnection(lines)) %>%
map_df(function(x) {
substr(x, 21, nchar(x)) %>% # focus on the part of the line with the readings
stri_match_all_regex("([-[:digit:]]+)") %>% # pull out all the readings by extracting the #'s
map(~.[,2]) %>%
flatten_chr() %>%
map(~ifelse(. == "-9999", NA, .)) %>% # make -9999 into NA
as.numeric() -> value # make it a number
data_frame(
location_id = substr(x, 1, 11),
date = as.Date(sprintf("%s-%s-%02d", substr(x, 12, 12+3), substr(x, 16, 16+1), 1:length(value))),
variable = substr(x, 18, 18+3),
value = value
) %>% filter(!is.na(date)) # don't include invalid dates
})
## # A tibble: 61 × 4
## location_id date variable value
## <chr> <date> <chr> <dbl>
## 1 BC000068032 1981-09-01 TMAX 232
## 2 BC000068032 1981-09-02 TMAX 220
## 3 BC000068032 1981-09-03 TMAX 220
## 4 BC000068032 1981-09-04 TMAX 244
## 5 BC000068032 1981-09-05 TMAX 239
## 6 BC000068032 1981-09-06 TMAX 246
## 7 BC000068032 1981-09-07 TMAX 270
## 8 BC000068032 1981-09-08 TMAX 300
## 9 BC000068032 1981-09-09 TMAX 327
## 10 BC000068032 1981-09-10 TMAX 279
## # ... with 51 more rows
应该管用。