0

我连续测量了 4 个物体(A、B、C、D)的长度。

我有 2 个数据帧,即 data1 和 data2,如下所示...

data1包含测量数据,其中长度是随时间测量的。 数据 2包含类别(A、B、C、D)及其测量时间间隔。

data1 =structure(list(Time = c("9/18/2019 15:30", "9/18/2019 15:31", 
                               "9/18/2019 15:32", "9/18/2019 15:33", "9/18/2019 15:34", "9/18/2019 15:35", 
                               "9/18/2019 15:36", "9/18/2019 15:37", "9/18/2019 15:38", "9/18/2019 15:39", 
                               "9/18/2019 15:40", "9/18/2019 15:41", "9/18/2019 15:42", "9/18/2019 15:43", 
                               "9/18/2019 15:44", "9/18/2019 15:45", "9/18/2019 15:46", "9/18/2019 15:47", 
                               "9/18/2019 15:48", "9/18/2019 15:49", "9/18/2019 15:50", "9/18/2019 15:51", 
                               "9/18/2019 15:52", "9/18/2019 15:53", "9/18/2019 15:54", "9/18/2019 15:55", 
                               "9/18/2019 15:56", "9/18/2019 15:57", "9/18/2019 15:58", "9/18/2019 15:59", 
                               "9/18/2019 16:00", "9/18/2019 16:01", "9/18/2019 16:02", "9/18/2019 16:03", 
                               "9/18/2019 16:04", "9/18/2019 16:05", "9/18/2019 16:06", "9/18/2019 16:07", 
                               "9/18/2019 16:08", "9/18/2019 16:09", "9/18/2019 16:10", "9/18/2019 16:11", 
                               "9/18/2019 16:12", "9/18/2019 16:13", "9/18/2019 16:14", "9/18/2019 16:15", 
                               "9/18/2019 16:16", "9/18/2019 16:17"), length = c(36, 95, 45, 
                                                                                 45, 50, 47, 45, 43, 40, 38, 36, 33, 31, 78, 5, 45, 10, 45, 80, 
                                                                                 116, 151, 186, 222, 257, 292, 328, 45, 454, 57, 197, 203, 209, 
                                                                                 215, 221, 227, 233, 239, 45, 65, 85, 105, 45, 45, 15, 5, 78, 
                                                                                 96, 127)), .Names = c("Time", "length"), class = c("tbl_df", 
                                                                                                                                    "tbl", "data.frame"), row.names = c(NA, -48L), spec = structure(list(
                                                                                                                                      cols = structure(list(Time = structure(list(), class = c("collector_character", 
                                                                                                                                                                                               "collector")), length = structure(list(), class = c("collector_double", 
                                                                                                                                                                                                                                                   "collector"))), .Names = c("Time", "length")), default = structure(list(), class = c("collector_guess", 
                                                                                                                                                                                                                                                                                                                                        "collector"))), .Names = c("cols", "default"), class = "col_spec"))

data1$Time = as.POSIXct(strptime(data1$Time, format="%m/%d/%Y %H:%M"))

data2 = structure(list(Class = c("A", "B", "C", "D"), From = c("9/18/2019 15:31", 
                                                       "9/18/2019 15:45", "9/18/2019 15:56", "9/18/2019 16:07"), To = c("9/18/2019 15:40", 
                                                                                                                        "9/18/2019 15:53", "9/18/2019 16:05", "9/18/2019 16:16")), .Names = c("Class", 
                                                                                                                                                                                              "From", "To"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
                                                                                                                                                                                                                                                                     -4L), spec = structure(list(cols = structure(list(Class = structure(list(), class = c("collector_character", 
                                                                                                                                                                                                                                                                                                                                                           "collector")), From = structure(list(), class = c("collector_character", 
                                                                                                                                                                                                                                                                                                                                                                                                             "collector")), To = structure(list(), class = c("collector_character", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                             "collector"))), .Names = c("Class", "From", "To")), default = structure(list(), class = c("collector_guess", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       "collector"))), .Names = c("cols", "default"), class = "col_spec"))


data2$From = as.POSIXct(strptime(data2$From, format="%m/%d/%Y %H:%M"))
data2$To = as.POSIXct(strptime(data2$To, format="%m/%d/%Y %H:%M"))

我想要一个合并的数据框,其中类别的长度在其时间间隔内测量(如下所示)。

在此处输入图像描述

任何建议将不胜感激。如果您需要更多详细信息,请告诉我。

ps 附上的数据是一个玩具数据。实际的 data1 包含 200k 行。我已经提供了 Posixct 时间转换的代码。

4

2 回答 2

1

您可以使用嵌套ifelse()函数来做到这一点:

data1$Class=ifelse(data1$Time>data2$From[1] & data1$Time<data2$To[1],"A",
            ifelse(data1$Time>data2$From[2] & data1$Time<data2$To[2],"B",
            ifelse(data1$Time>data2$From[3] & data1$Time<data2$To[3],"C",
            ifelse(data1$Time>data2$From[4] & data1$Time<data2$To[4],"D",NA))))

不属于 4 个类别中的任何一个的事件的值将是NA

于 2019-09-18T16:43:48.730 回答
1

您可以使用包中的right_join功能dplyr

df <- right_join(data2,data1, by = c("From" =  "Time"))
df <- df %>% select(-From)
head(df,11)

输出:

# A tibble: 11 x 3
   Class To              length
   <chr> <chr>            <dbl>
 1 NA    NA                  36
 2 NA    NA                  95
 3 NA    NA                  45
 4 NA    NA                  45
 5 NA    NA                  50
 6 NA    NA                  47
 7 NA    NA                  45
 8 NA    NA                  43
 9 NA    NA                  40
10 NA    NA                  38
11 A     9/18/2019 15:40     36

如果您只想要匹配项,则可以使用inner_join而不是right_join

df <- inner_join(data2,data1, by = c("From" =  "Time"))
df <- df %>% select(-From)
df

# A tibble: 4 x 3
  Class To              length
  <chr> <chr>            <dbl>
1 A     9/18/2019 15:40     36
2 B     9/18/2019 15:53    257
3 C     9/18/2019 16:05    233
4 D     9/18/2019 16:16     96

于 2019-09-18T16:56:59.073 回答