2

我有时间序列(日期时间、实例、值),值中有一些 NA。如果所有实例的值 - 同一日期时间的 NA,则意味着数据收集存在差距。我需要强调这些时期。

我的示例脚本和数据:

library(tidyr)
library(ggplot2)


example.data1 <- data.frame( Instance = rep("A",11),
                            datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
                            Value = c(0,1,2,3,4,5,6,NA,NA,9,10)
)   

example.data2 <- data.frame( Instance = rep("B",11),
                             datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
                             Value = c(1,2,NA,4,5,6,7,NA,NA,10,11)
)   

example.data3 <- data.frame( Instance = rep("C",11),
                             datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
                             Value = c(2,3,4,5,NA,7,8,NA,NA,11,12)
)   

example.data <- bind_rows(example.data1, example.data2, example.data3)

ggplot (data = example.data, aes(x=datetime,y=Value, color = Instance)) + 
    geom_line(size = 1.2) +
    theme_bw()

我的结果图:

在此处输入图像描述

我真正需要的:

在此处输入图像描述

如何达到?

UPD。

代码是下面的答案无法正常工作。看那个:

example.data.gap <- example.data %>%
    group_by(datetime) %>%
    summarise(is_gap = all(is.na(Value))) %>%
    # Start and End 
    mutate(xmin = lag(datetime), xmax = lead(datetime)) %>%
    filter(is_gap)

结果是 2 个重叠间隔而不是 1 个:

# A tibble: 2 x 4
  datetime            is_gap xmin                xmax               
  <dttm>              <lgl>  <dttm>              <dttm>             
1 2020-12-26 10:01:45 TRUE   2020-12-26 10:01:30 2020-12-26 10:02:00
2 2020-12-26 10:02:00 TRUE   2020-12-26 10:01:45 2020-12-26 10:02:15

图片 - 如果我们使用 alpha,我们可以看到重叠:

ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
    geom_line(size = 1.2) +
    geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", alpha = 0.5, inherit.aes = FALSE) +
    theme_bw()

在此处输入图像描述

4

3 回答 3

4

轻微改装:

example.data.gap <- example.data %>%
  group_by(datetime) %>%
  summarise(is_gap = all(is.na(Value)), .groups = "drop") %>%
  mutate(
    grp = data.table::rleid(is_gap),
    prevtime = lag(datetime),
    nexttime = lead(datetime)
  ) %>%
  filter(is_gap) %>%
  group_by(grp) %>%
  summarize(xmin = min(prevtime), xmax = max(nexttime), .groups = "drop")

ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
  geom_line(size = 1.2) +
  geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", alpha = 0.5, inherit.aes = FALSE) +
  theme_bw()

在此处输入图像描述

如果您尚未安装,则(仅一个向量,不像 ) 那样可扩展data.table的替代品是:rleiddata.table::rleid

my_rleid <- function(x) { r <- rle(x)$lengths; rep(seq_along(r), times = r); }
于 2021-12-23T13:54:58.793 回答
3

一种选择是创建一个仅包含间隙以及间隙的开始和结束的数据框,并geom_rect用于“突出显示”间隙:

library(dplyr)
library(ggplot2)

example.data <- bind_rows(example.data1, example.data2, example.data3)

example.data.gap <- example.data %>%
  group_by(datetime) %>%
  summarise(is_gap = all(is.na(Value))) %>%
  # Start and End 
  mutate(xmin = lag(datetime), xmax = lead(datetime)) %>%
  filter(is_gap)

ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
  geom_line(size = 1.2) +
  geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", inherit.aes = FALSE) +
  theme_bw()

于 2021-12-23T11:52:25.280 回答
2

基于 Stefan 的想法,但ggforce::geom_mark_rect改为使用。需要更少的数据准备。

你可以玩弄宽度,但我有点喜欢它不能填补整个空白

example.data.gap <- example.data %>%
  group_by(datetime) %>%
  filter(all(is.na(Value)))

ylims<- range(example.data$Value, na.rm = TRUE)

ggplot(data = example.data, aes(x = datetime, y = Value)) +
  geom_line(size = 1.2, aes(color = Instance)) +
  ggforce::geom_mark_rect(data = example.data.gap, aes(x = datetime, fill = is.na(Value), 
                          y = seq(ylims[1], ylims[2], length = nrow(example.data.gap))))

在此处输入图像描述

于 2021-12-23T14:08:57.753 回答